@midscene/core 1.2.2-beta-20260116064919.0 → 1.2.2-beta-20260116083700.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/dist/es/agent/agent.mjs +7 -4
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +4 -2
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/auto-glm/actions.mjs +224 -0
  7. package/dist/es/ai-model/auto-glm/actions.mjs.map +1 -0
  8. package/dist/es/ai-model/auto-glm/index.mjs +6 -0
  9. package/dist/es/ai-model/auto-glm/parser.mjs +239 -0
  10. package/dist/es/ai-model/auto-glm/parser.mjs.map +1 -0
  11. package/dist/es/ai-model/auto-glm/planning.mjs +63 -0
  12. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -0
  13. package/dist/es/ai-model/auto-glm/prompt.mjs +222 -0
  14. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -0
  15. package/dist/es/ai-model/auto-glm/util.mjs +6 -0
  16. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -0
  17. package/dist/es/ai-model/index.mjs +2 -1
  18. package/dist/es/ai-model/inspect.mjs +71 -6
  19. package/dist/es/ai-model/inspect.mjs.map +1 -1
  20. package/dist/es/ai-model/llm-planning.mjs +1 -1
  21. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  22. package/dist/es/ai-model/service-caller/index.mjs +5 -0
  23. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  24. package/dist/es/ai-model/ui-tars-planning.mjs +25 -22
  25. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  26. package/dist/es/common.mjs +15 -2
  27. package/dist/es/common.mjs.map +1 -1
  28. package/dist/es/index.mjs.map +1 -1
  29. package/dist/es/screenshot-item.mjs +9 -4
  30. package/dist/es/screenshot-item.mjs.map +1 -1
  31. package/dist/es/service/index.mjs +6 -1
  32. package/dist/es/service/index.mjs.map +1 -1
  33. package/dist/es/utils.mjs +2 -2
  34. package/dist/es/yaml/player.mjs +4 -3
  35. package/dist/es/yaml/player.mjs.map +1 -1
  36. package/dist/lib/agent/agent.js +7 -4
  37. package/dist/lib/agent/agent.js.map +1 -1
  38. package/dist/lib/agent/tasks.js +3 -1
  39. package/dist/lib/agent/tasks.js.map +1 -1
  40. package/dist/lib/agent/utils.js +1 -1
  41. package/dist/lib/ai-model/auto-glm/actions.js +258 -0
  42. package/dist/lib/ai-model/auto-glm/actions.js.map +1 -0
  43. package/dist/lib/ai-model/auto-glm/index.js +63 -0
  44. package/dist/lib/ai-model/auto-glm/index.js.map +1 -0
  45. package/dist/lib/ai-model/auto-glm/parser.js +282 -0
  46. package/dist/lib/ai-model/auto-glm/parser.js.map +1 -0
  47. package/dist/lib/ai-model/auto-glm/planning.js +97 -0
  48. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -0
  49. package/dist/lib/ai-model/auto-glm/prompt.js +259 -0
  50. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -0
  51. package/dist/lib/ai-model/auto-glm/util.js +40 -0
  52. package/dist/lib/ai-model/auto-glm/util.js.map +1 -0
  53. package/dist/lib/ai-model/index.js +15 -11
  54. package/dist/lib/ai-model/inspect.js +70 -5
  55. package/dist/lib/ai-model/inspect.js.map +1 -1
  56. package/dist/lib/ai-model/llm-planning.js +1 -1
  57. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  58. package/dist/lib/ai-model/service-caller/index.js +5 -0
  59. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  60. package/dist/lib/ai-model/ui-tars-planning.js +25 -22
  61. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  62. package/dist/lib/common.js +18 -2
  63. package/dist/lib/common.js.map +1 -1
  64. package/dist/lib/index.js.map +1 -1
  65. package/dist/lib/screenshot-item.js +9 -4
  66. package/dist/lib/screenshot-item.js.map +1 -1
  67. package/dist/lib/service/index.js +6 -1
  68. package/dist/lib/service/index.js.map +1 -1
  69. package/dist/lib/utils.js +2 -2
  70. package/dist/lib/yaml/player.js +4 -3
  71. package/dist/lib/yaml/player.js.map +1 -1
  72. package/dist/types/ai-model/auto-glm/actions.d.ts +77 -0
  73. package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
  74. package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
  75. package/dist/types/ai-model/auto-glm/planning.d.ts +9 -0
  76. package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
  77. package/dist/types/ai-model/auto-glm/util.d.ts +7 -0
  78. package/dist/types/ai-model/index.d.ts +1 -0
  79. package/dist/types/common.d.ts +10 -0
  80. package/dist/types/index.d.ts +1 -0
  81. package/dist/types/screenshot-item.d.ts +18 -8
  82. package/dist/types/yaml.d.ts +5 -1
  83. package/package.json +2 -2
@@ -34,6 +34,9 @@ const img_namespaceObject = require("@midscene/shared/img");
34
34
  const logger_namespaceObject = require("@midscene/shared/logger");
35
35
  const utils_namespaceObject = require("@midscene/shared/utils");
36
36
  const external_common_js_namespaceObject = require("../common.js");
37
+ const parser_js_namespaceObject = require("./auto-glm/parser.js");
38
+ const prompt_js_namespaceObject = require("./auto-glm/prompt.js");
39
+ const util_js_namespaceObject = require("./auto-glm/util.js");
37
40
  const extraction_js_namespaceObject = require("./prompt/extraction.js");
38
41
  const llm_locator_js_namespaceObject = require("./prompt/llm-locator.js");
39
42
  const llm_section_locator_js_namespaceObject = require("./prompt/llm-section-locator.js");
@@ -87,11 +90,11 @@ const promptsToChatParam = async (multimodalPrompt)=>{
87
90
  async function AiLocateElement(options) {
88
91
  const { context, targetElementDescription, callAIFn, modelConfig } = options;
89
92
  const { vlMode } = modelConfig;
90
- const screenshotBase64 = context.screenshot.getData();
93
+ const screenshotBase64 = context.screenshot.base64;
91
94
  (0, utils_namespaceObject.assert)(targetElementDescription, "cannot find the target element description");
92
95
  const targetElementDescriptionText = extraTextFromUserPrompt(targetElementDescription);
93
96
  const userInstructionPrompt = (0, llm_locator_js_namespaceObject.findElementPrompt)(targetElementDescriptionText);
94
- const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)(vlMode);
97
+ const systemPrompt = (0, util_js_namespaceObject.isAutoGLM)(vlMode) ? (0, prompt_js_namespaceObject.getAutoGLMLocatePrompt)(vlMode) : (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)(vlMode);
95
98
  let imagePayload = screenshotBase64;
96
99
  let imageWidth = context.size.width;
97
100
  let imageHeight = context.size.height;
@@ -128,7 +131,7 @@ async function AiLocateElement(options) {
128
131
  },
129
132
  {
130
133
  type: 'text',
131
- text: userInstructionPrompt
134
+ text: (0, util_js_namespaceObject.isAutoGLM)(vlMode) ? `Tap: ${userInstructionPrompt}` : userInstructionPrompt
132
135
  }
133
136
  ]
134
137
  }
@@ -140,6 +143,68 @@ async function AiLocateElement(options) {
140
143
  });
141
144
  msgs.push(...addOns);
142
145
  }
146
+ if ((0, util_js_namespaceObject.isAutoGLM)(vlMode)) {
147
+ const { content: rawResponseContent, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelConfig);
148
+ debugInspect('auto-glm rawResponse:', rawResponseContent);
149
+ const parsed = (0, parser_js_namespaceObject.parseAutoGLMLocateResponse)(rawResponseContent);
150
+ debugInspect('auto-glm thinking:', parsed.think);
151
+ debugInspect('auto-glm coordinates:', parsed.coordinates);
152
+ let resRect;
153
+ let matchedElements = [];
154
+ let errors = [];
155
+ if (parsed.error || !parsed.coordinates) {
156
+ errors = [
157
+ parsed.error || 'Failed to parse auto-glm response'
158
+ ];
159
+ debugInspect('auto-glm parse error:', errors[0]);
160
+ } else {
161
+ const { x, y } = parsed.coordinates;
162
+ debugInspect('auto-glm coordinates [0-999]:', {
163
+ x,
164
+ y
165
+ });
166
+ const pixelX = Math.round(x * imageWidth / 1000);
167
+ const pixelY = Math.round(y * imageHeight / 1000);
168
+ debugInspect('auto-glm pixel coordinates:', {
169
+ pixelX,
170
+ pixelY
171
+ });
172
+ const bboxSize = 10;
173
+ const x1 = Math.max(pixelX - bboxSize / 2, 0);
174
+ const y1 = Math.max(pixelY - bboxSize / 2, 0);
175
+ const x2 = Math.min(pixelX + bboxSize / 2, imageWidth);
176
+ const y2 = Math.min(pixelY + bboxSize / 2, imageHeight);
177
+ resRect = {
178
+ left: x1,
179
+ top: y1,
180
+ width: x2 - x1,
181
+ height: y2 - y1
182
+ };
183
+ if (options.searchConfig?.rect) {
184
+ resRect.left += options.searchConfig.rect.left;
185
+ resRect.top += options.searchConfig.rect.top;
186
+ }
187
+ debugInspect('auto-glm resRect:', resRect);
188
+ const rectCenter = {
189
+ x: resRect.left + resRect.width / 2,
190
+ y: resRect.top + resRect.height / 2
191
+ };
192
+ const element = (0, dom_util_namespaceObject.generateElementByPosition)(rectCenter, targetElementDescriptionText);
193
+ if (element) matchedElements = [
194
+ element
195
+ ];
196
+ }
197
+ return {
198
+ rect: resRect,
199
+ parseResult: {
200
+ elements: matchedElements,
201
+ errors
202
+ },
203
+ rawResponse: rawResponseContent,
204
+ usage,
205
+ reasoning_content: parsed.think
206
+ };
207
+ }
143
208
  const res = await callAIFn(msgs, modelConfig);
144
209
  const rawResponse = JSON.stringify(res.content);
145
210
  let resRect;
@@ -180,7 +245,7 @@ async function AiLocateElement(options) {
180
245
  async function AiLocateSection(options) {
181
246
  const { context, sectionDescription, modelConfig } = options;
182
247
  const { vlMode } = modelConfig;
183
- const screenshotBase64 = context.screenshot.getData();
248
+ const screenshotBase64 = context.screenshot.base64;
184
249
  const systemPrompt = (0, llm_section_locator_js_namespaceObject.systemPromptToLocateSection)(vlMode);
185
250
  const sectionLocatorInstructionText = (0, llm_section_locator_js_namespaceObject.sectionLocatorInstruction)(extraTextFromUserPrompt(sectionDescription));
186
251
  const msgs = [
@@ -248,7 +313,7 @@ async function AiLocateSection(options) {
248
313
  async function AiExtractElementInfo(options) {
249
314
  const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } = options;
250
315
  const systemPrompt = (0, extraction_js_namespaceObject.systemPromptToExtract)();
251
- const screenshotBase64 = context.screenshot.getData();
316
+ const screenshotBase64 = context.screenshot.base64;
252
317
  const extractDataPromptText = (0, extraction_js_namespaceObject.extractDataQueryPrompt)(options.pageDescription || '', dataQuery);
253
318
  const userContent = [];
254
319
  if (extractOption?.screenshotIncluded !== false) userContent.push({
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const screenshotBase64 = context.screenshot.getData();\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const screenshotBase64 = context.screenshot.getData();\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.getData();\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","res","rawResponse","JSON","resRect","matchedElements","errors","Array","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;AC2CA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,OAAO;IAEnDM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;QACxBO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAad,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCe,cAAcf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCgB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,iBAAXA,QAAyB;QAClC,MAAMa,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMvB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMkB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMC,MAAM,MAAMlB,SAASR,MAAMS;IAEjC,MAAMkB,cAAcC,KAAK,SAAS,CAACF,IAAI,OAAO;IAE9C,IAAIG;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBM,MAAM,OAAO,CAACN,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAG,UAAUI,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRP,IAAI,OAAO,CAAC,IAAI,EAChBP,YACAC,aACAf,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BgB,oBACAC,qBACAZ;YAGFjB,aAAa,WAAWoC;YAExB,MAAMK,aAAa;gBACjB,GAAGL,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMM,UAA+BC,AAAAA,IAAAA,yBAAAA,yBAAAA,AAAAA,EACnCF,YACArB;YAEFkB,SAAS,EAAE;YAEX,IAAII,SACFL,kBAAkB;gBAACK;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACN,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEO,IAAI,CAAC,CAAC;aAFtBP,SAAS;YAACO;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMT;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOD,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAec,gBAAgBnC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEmC,kBAAkB,EAAEhC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,OAAO;IAEnD,MAAMU,eAAe0B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BhC;IACjD,MAAMiC,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpChD,wBAAwB6C;IAE1B,MAAMzC,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMgC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMhB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQ2C,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAzC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMoB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB9C,MACAS;IAGF,IAAIsC;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBe,aACA1C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BsD;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DlD,aAAa,wBAAwBuD;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASpB,MAAM,OAAO,CAACoB,OAC/B,GAAG,CAAC,CAACA,OACGnB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLmB,MACA9C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqBwD;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DxD,aAAa,iBAAiB0D;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAY/C,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BoD;IAC1C;IAEA,IAAIS,cAAc7C;IAClB,IAAIoC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1B/C,kBACAoC,aACArC,AAAW,iBAAXA;QAEF8C,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAajB,KAAK,SAAS,CAACiB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBtD,OAO7C;IACC,MAAM,EAAEuD,SAAS,EAAEtD,OAAO,EAAEuD,aAAa,EAAE9D,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe8C,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAMnD,mBAAmBL,QAAQ,UAAU,CAAC,OAAO;IAEnD,MAAMyD,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B3D,QAAQ,eAAe,IAAI,IAC3BuD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKtD;YACL,QAAQ;QACV;IACF;IAGFsD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM/D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASiD;QACX;KACD;IAED,IAAIlE,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMoB,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB9C,MACAS;IAEF,OAAO;QACL,aAAaoC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB3D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeoD,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMnE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASqD;QACX;KACD;IAED,MAAMxB,SAAS,MAAMrC,SAASR,MAAMS;IAEpC,OAAO;QACL,kBAAkBoC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(vlMode)\n ? getAutoGLMLocatePrompt(vlMode)\n : systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(vlMode)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(vlMode)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Create a small bbox around the point\n const bboxSize = 10;\n const x1 = Math.max(pixelX - bboxSize / 2, 0);\n const y1 = Math.max(pixelY - bboxSize / 2, 0);\n const x2 = Math.min(pixelX + bboxSize / 2, imageWidth);\n const y2 = Math.min(pixelY + bboxSize / 2, imageHeight);\n\n // Convert to Rect format\n resRect = {\n left: x1,\n top: y1,\n width: x2 - x1,\n height: y2 - y1,\n };\n\n // Apply offset if searching in a cropped area\n if (options.searchConfig?.rect) {\n resRect.left += options.searchConfig.rect.left;\n resRect.top += options.searchConfig.rect.top;\n }\n\n debugInspect('auto-glm resRect:', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n const res = await callAIFn(msgs, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","bboxSize","x1","y1","x2","y2","rectCenter","element","generateElementByPosition","res","rawResponse","JSON","Array","adaptBboxToRect","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;ACiDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElDM,IAAAA,sBAAAA,MAAAA,AAAAA,EACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,AAAAA,IAAAA,+BAAAA,iBAAAA,AAAAA,EAAkBF;IAChD,MAAMG,eAAeC,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,UAC3BQ,AAAAA,IAAAA,0BAAAA,sBAAAA,AAAAA,EAAuBR,UACvBS,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAaf,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIgB,cAAchB,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIiB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjB,QAAQ,YAAY,EAAE;QACxBO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFe,eAAef,QAAQ,YAAY,CAAC,WAAW;QAC/CgB,aAAahB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiB,cAAcjB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAW,iBAAXA,QAAyB;QAClC,MAAMe,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMzB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,UACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMoB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI2B;IACf;IAEA,IAAIV,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUP,SAAS;QACrB,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAAyB9B,MAAMS;QAEvChB,aAAa,yBAAyBmC;QAEtC,MAAMG,SAASC,AAAAA,IAAAA,0BAAAA,0BAAAA,AAAAA,EAA2BJ;QAE1CnC,aAAa,sBAAsBsC,OAAO,KAAK;QAC/CtC,aAAa,yBAAyBsC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DtC,aAAa,yBAAyB0C,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCtC,aAAa,iCAAiC;gBAAE2C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C7B,aAAa,+BAA+B;gBAAE6C;gBAAQE;YAAO;YAG7D,MAAMC,WAAW;YACjB,MAAMC,KAAKH,KAAK,GAAG,CAACD,SAASG,WAAW,GAAG;YAC3C,MAAME,KAAKJ,KAAK,GAAG,CAACC,SAASC,WAAW,GAAG;YAC3C,MAAMG,KAAKL,KAAK,GAAG,CAACD,SAASG,WAAW,GAAGpB;YAC3C,MAAMwB,KAAKN,KAAK,GAAG,CAACC,SAASC,WAAW,GAAGnB;YAG3CW,UAAU;gBACR,MAAMS;gBACN,KAAKC;gBACL,OAAOC,KAAKF;gBACZ,QAAQG,KAAKF;YACf;YAGA,IAAItC,QAAQ,YAAY,EAAE,MAAM;gBAC9B4B,QAAQ,IAAI,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBAC9C4B,QAAQ,GAAG,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YAC9C;YAEAZ,aAAa,qBAAqBwC;YAElC,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,AAAAA,IAAAA,yBAAAA,yBAAAA,AAAAA,EACnCF,YACAjC;YAGF,IAAIkC,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMd;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,MAAMkB,MAAM,MAAMzC,SAASR,MAAMS;IAEjC,MAAMyC,cAAcC,KAAK,SAAS,CAACF,IAAI,OAAO;IAE9C,IAAIhB;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYc,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBG,MAAM,OAAO,CAACH,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAhB,UAAUoB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRJ,IAAI,OAAO,CAAC,IAAI,EAChB5B,YACAC,aACAjB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkB,oBACAC,qBACAd;YAGFjB,aAAa,WAAWwC;YAExB,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,AAAAA,IAAAA,yBAAAA,yBAAAA,AAAAA,EACnCF,YACAjC;YAEFsB,SAAS,EAAE;YAEX,IAAIY,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;IACF,EAAE,OAAOO,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACnB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEoB,IAAI,CAAC,CAAC;aAFtBpB,SAAS;YAACoB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMtB;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAe;QACA,OAAOD,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAeQ,gBAAgBpD,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoD,kBAAkB,EAAEjD,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMU,eAAe2C,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BjD;IACjD,MAAMkD,gCAAgCC,AAAAA,IAAAA,uCAAAA,yBAAAA,AAAAA,EACpCjE,wBAAwB8D;IAE1B,MAAM1D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM/B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQ4D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1D,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMmC,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB/D,MACAS;IAGF,IAAIuD;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAab,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBY,aACA3D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnE,aAAa,wBAAwBwE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASjB,MAAM,OAAO,CAACiB,OAC/B,GAAG,CAAC,CAACA,OACGhB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLgB,MACA/D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DzE,aAAa,iBAAiB2E;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYhE,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqE;IAC1C;IAEA,IAAIS,cAAc9D;IAClB,IAAIqD,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BhE,kBACAqD,aACAtD,AAAW,iBAAXA;QAEF+D,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvE,OAO7C;IACC,MAAM,EAAEwE,SAAS,EAAEvE,OAAO,EAAEwE,aAAa,EAAE/E,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+D,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IACrB,MAAMpE,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0E,wBAAwBC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAC5B5E,QAAQ,eAAe,IAAI,IAC3BwE;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvE;YACL,QAAQ;QACV;IACF;IAGFuE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkE;QACX;KACD;IAED,IAAInF,kBAAkB;QACpB,MAAM4B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMmC,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB/D,MACAS;IAEF,OAAO;QACL,aAAaqD,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB5E,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeqE,AAAAA,IAAAA,yCAAAA,iCAAAA,AAAAA;IACrB,MAAMC,aAAaC,AAAAA,IAAAA,yCAAAA,yBAAAA,AAAAA,EAA0BH;IAE7C,MAAMpF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASsE;QACX;KACD;IAED,MAAMxB,SAAS,MAAMtD,SAASR,MAAMS;IAEpC,OAAO;QACL,kBAAkBqD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -36,7 +36,7 @@ const debug = (0, logger_namespaceObject.getDebug)('planning');
36
36
  async function plan(userInstruction, opts) {
37
37
  const { context, modelConfig, conversationHistory } = opts;
38
38
  const { size } = context;
39
- const screenshotBase64 = context.screenshot.getData();
39
+ const screenshotBase64 = context.screenshot.base64;
40
40
  const { vlMode } = modelConfig;
41
41
  const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
42
42
  actionSpace: opts.actionSpace,
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n finalizeActionName,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.getData();\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n includeThought: opts.deepThink !== true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n if (actions[0]?.type === finalizeActionName) {\n debug('finalize action planned, stop planning');\n shouldContinuePlanning = false;\n }\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","undefined","actions","shouldContinuePlanning","finalizeActionName","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACiBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,OAAO;IAEnD,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;QAC7B,gBAAgBA,AAAmB,SAAnBA,KAAK,SAAS;IAChC;IAEA,IAAIS,eAAeJ;IACnB,IAAIK,aAAaN,KAAK,KAAK;IAC3B,IAAIO,cAAcP,KAAK,MAAM;IAC7B,MAAMQ,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACRL,MACAlB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe0B,SAAY1B,KAAK,SAAS;IACpE;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,IAAIO,yBAAyB;IAC7B,IAAID,OAAO,CAAC,EAAE,EAAE,SAASE,mCAAAA,kBAAkBA,EAAE;QAC3CjC,MAAM;QACNgC,yBAAyB;IAC3B;IACA,MAAME,cAAkC;QACtC,GAAGT,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBJ,SAAS3B,KAAK,WAAW;QAC1D4B;IACF;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOX,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACM;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACiC,SAAWA,OAAO,IAAI,KAAKC;QAG9BtC,MAAM,+BAA+BuC;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENvC,MAAM,gBAAgBwC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgBjC,AAAWoB,WAAXpB,QAElB2B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA7B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOQ;AACT"}
1
+ {"version":3,"file":"ai-model/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n finalizeActionName,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n includeThought: opts.deepThink !== true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n if (actions[0]?.type === finalizeActionName) {\n debug('finalize action planned, stop planning');\n shouldContinuePlanning = false;\n }\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","undefined","actions","shouldContinuePlanning","finalizeActionName","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACiBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;QAC7B,gBAAgBA,AAAmB,SAAnBA,KAAK,SAAS;IAChC;IAEA,IAAIS,eAAeJ;IACnB,IAAIK,aAAaN,KAAK,KAAK;IAC3B,IAAIO,cAAcP,KAAK,MAAM;IAC7B,MAAMQ,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACRL,MACAlB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe0B,SAAY1B,KAAK,SAAS;IACpE;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,IAAIO,yBAAyB;IAC7B,IAAID,OAAO,CAAC,EAAE,EAAE,SAASE,mCAAAA,kBAAkBA,EAAE;QAC3CjC,MAAM;QACNgC,yBAAyB;IAC3B;IACA,MAAME,cAAkC;QACtC,GAAGT,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUO,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBJ,SAAS3B,KAAK,WAAW;QAC1D4B;IACF;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOX,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACM;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACiC,SAAWA,OAAO,IAAI,KAAKC;QAG9BtC,MAAM,+BAA+BuC;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENvC,MAAM,gBAAgBwC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgBjC,AAAWoB,WAAXpB,QAElB2B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA7B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOQ;AACT"}
@@ -47,6 +47,7 @@ const utils_namespaceObject = require("@midscene/shared/utils");
47
47
  const external_jsonrepair_namespaceObject = require("jsonrepair");
48
48
  const external_openai_namespaceObject = require("openai");
49
49
  var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
50
+ const util_js_namespaceObject = require("../auto-glm/util.js");
50
51
  async function createChatClient({ modelConfig }) {
51
52
  const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, modelFamily, createOpenAIClient, timeout } = modelConfig;
52
53
  let proxyAgent;
@@ -185,6 +186,10 @@ async function callAI(messages, modelConfig, options) {
185
186
  vl_high_resolution_images: true
186
187
  } : {}
187
188
  };
189
+ if ((0, util_js_namespaceObject.isAutoGLM)(vlMode)) {
190
+ commonConfig.top_p = 0.85;
191
+ commonConfig.frequency_penalty = 0.2;
192
+ }
188
193
  const { config: deepThinkConfig, debugMessage, warningMessage } = resolveDeepThinkConfig({
189
194
  deepThink: options?.deepThink,
190
195
  modelFamily
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n AIResponseFormat,\n type AIUsageInfo,\n type DeepThinkOption,\n} from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n modelFamily,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n console.warn(\n `[Midscene] AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n modelFamily,\n}: {\n deepThink?: DeepThinkOption;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (modelFamily === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (modelFamily === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n if (modelFamily === 'glm-v') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`,\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${modelFamily ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${modelFamily ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","modelFamily","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","Array","item","normalized","value","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","jsonString","String"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;ACoBA,eAAeI,iBAAiB,EAC9BC,WAAW,EAGZ;IAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGZ;IAEJ,IAAIa;IACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIf,WAAW;QACbY,WAAW,oBAAoBE,iBAAiBd;QAChD,IAAIkB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBa,WAAW,qBAAqBE,iBAAiBf;QACjD,IAAImB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIlB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAEzB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAMgC,gBAAgB;QACpB,SAAS7B;QACT,QAAQC;QAGR,GAAIQ,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGP,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOM,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,CAAAA,yBAAAA,EAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;QACA,IAAIlB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACI,oBAAAA,uBAAuBA,GACjE;QACA,IAAIrB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnCjC;QACAI;QACAC;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC9C,WAAyB,EACzB+C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV7C,SAAS,EACTI,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACNC,WAAW,EACZ,GAAG,MAAMX,iBAAiB;QACzBC;IACF;IAEA,MAAMiD,YACJZ,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACa,oBAAAA,yBAAyBA,KACvEb,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACc,oBAAAA,iBAAiBA;IACjE,MAAMC,YAAYrC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAC3B,MAAMsC,oBAAoBtC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IACnC,MAAMuC,qBAAqBvC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAczD,YAAY,WAAW,IAAI;IAE/C,MAAM0D,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY5D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMmE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIxC,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IACA,MAAM,EACJ,QAAQ2D,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWxB,SAAS;QACpBrC;IACF;IACA,IAAI2D,cACFjB,UAAUiB;IAEZ,IAAIC,gBAAgB;QAClBlB,UAAUkB;QACVjD,QAAQ,IAAI,CAACiD;IACf;IAEA,IAAI;QACFlB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEvD,WAAW;QAGrE,IAAIuD,aAAa;YACf,MAAMc,SAAU,MAAMxB,WAAW,MAAM,CACrC;gBACE,OAAO7C;gBACP2C;gBACA,GAAGqB,YAAY;gBACf,GAAGC,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMb,UAAUc,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbX,QAAQW,MAAM,KAAK;gBAGrB,IAAId,WAAWe,mBAAmB;oBAChCd,eAAeD;oBACfE,wBAAwBa;oBACxB,MAAMC,YAAiC;wBACrChB;wBACAe;wBACAd;wBACA,YAAY;wBACZ,OAAOgB;oBACT;oBACA7B,QAAQ,OAAO,CAAE4B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCV,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMe,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAClB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAee;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTnB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEgC;oBACjB;gBACF;YACF;YACApB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEsD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE1H,OAAO;YAEL,MAAMuB,aAAahF,YAAY,UAAU,IAAI;YAC7C,MAAMiF,gBAAgBjF,YAAY,aAAa,IAAI;YACnD,MAAMkF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMrC,WAAW,MAAM,CAAC;oBACrC,OAAO7C;oBACP2C;oBACA,GAAGqB,YAAY;oBACf,GAAGC,eAAe;gBACpB;gBAEAL,WAAWP,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAE6E,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEtB,SAAS,aAAa,EAAEsB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE5B,eAAe,IAAI;gBAG9VH,mBACE,CAAC,oBAAoB,EAAEgC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI3D,MACR,CAAC,mCAAmC,EAAE4D,KAAK,SAAS,CAACD,SAAS;gBAIlE1B,UAAU0B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3C,IAAI,CAAC1B,SACH,MAAM,IAAIjC,MAAM;gBAGlBmC,uBACGwB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3DvB,QAAQuB,OAAO,KAAK;gBACpB;YACF,EAAE,OAAOrD,OAAO;gBACdmD,YAAYnD;gBACZ,IAAIoD,UAAUF,aAAa;oBACzB7D,QAAQ,IAAI,CACV,CAAC,mCAAmC,EAAE+D,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAEhI,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACtB,SACH,MAAMwB;QAEV;QAEA/B,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QAGxC,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMe,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEnB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAee;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASlB,WAAW;YACpB,mBAAmBE,wBAAwBe;YAC3C,OAAOZ,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAOgC,GAAQ;QACfrE,QAAQ,KAAK,CAAC,kBAAkBqE;QAChC,MAAMC,WAAW,IAAIjE,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEvD,UAAU,GAAG,EAAEuF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB9C,QAAsC,EACtC9C,WAAyB,EACzB+C,OAEC;IAOD,MAAM8C,WAAW,MAAMhD,OAAOC,UAAU9C,aAAa;QACnD,WAAW+C,SAAS;IACtB;IACA+C,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOD,UAAU;IACjB,MAAMpF,SAAST,YAAY,MAAM;IACjC,MAAM+F,cAAcC,cAAcH,SAAS,OAAO,EAAEpF;IACpDqF,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAuB,YAAvB,OAAOC,aACP,CAAC,0CAA0C,EAAE/F,YAAY,SAAS,CAAC,GAAG,EAAE6F,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZlG,WAAyB;IAEzB,MAAM,EAAE2D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAOqD,MAAMlG;IAC9C,OAAO;QAAE2D;QAASG;IAAM;AAC1B;AAEO,SAASqC,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASjC,uBAAuB,EACrCkC,SAAS,EACT/F,WAAW,EAIZ;IAKC,MAAMgG,sBAAsBD,AAAc,YAAdA,YAAwB7B,SAAY6B;IAEhE,IAAIC,AAAwB9B,WAAxB8B,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAAc9B;IAAU;IAG/C,IAAIlE,AAAgB,eAAhBA,aACF,OAAO;QACL,QAAQ;YAAE,iBAAiBgG;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIhG,AAAgB,oBAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMgG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,IAAIhG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMgG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,UAAU,CAAC;IAC7G;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAEhG,eAAe,UAAU,CAAC,CAAC;QACzF,gBAAgB,CAAC,0DAA0D,EAAEA,eAAe,UAAU,EAAE,CAAC;IAC3G;AACF;AAQA,SAASiG,oBAAoB/G,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIgH,MAAM,OAAO,CAAChH,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACiH,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOjH,KAAkB;QAC3B,MAAMkH,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACpH,KAAKqH,MAAM,IAAIpH,OAAO,OAAO,CAACC,KAAM;YAE9C,MAAMoH,aAAatH,IAAI,IAAI;YAG3B,IAAIuH,kBAAkBN,oBAAoBI;YAG1C,IAAI,AAA2B,YAA3B,OAAOE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCH,UAAU,CAACE,WAAW,GAAGC;QAC3B;QAEA,OAAOH;IACT;IAGA,IAAI,AAAe,YAAf,OAAOlH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASoG,cAAcQ,KAAa,EAAE/F,MAAgC;IAC3E,MAAMyG,kBAAkBf,yBAAyBK;IAEjD,IAAIU,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAItF;IAGT,IAAIV;IACJ,IAAIiE;IACJ,IAAI;QACFjE,SAASoE,KAAK,KAAK,CAAC4B;QACpB,OAAOP,oBAAoBzF;IAC7B,EAAE,OAAOc,OAAO;QACdmD,YAAYnD;IACd;IACA,IAAI;QACFd,SAASoE,KAAK,KAAK,CAAC6B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWD;QAC/B,OAAOP,oBAAoBzF;IAC7B,EAAE,OAAOc,OAAO;QACdmD,YAAYnD;IACd;IAEA,IAAIvB,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAM2G,aAAab,yBAAyBW;QAC5C,IAAI;YACFhG,SAASoE,KAAK,KAAK,CAAC6B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;YAC/B,OAAOT,oBAAoBzF;QAC7B,EAAE,OAAOc,OAAO;YACdmD,YAAYnD;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAE2F,OACjDlC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}
1
+ {"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack/runtime/compat_get_default_export","webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import {\n AIResponseFormat,\n type AIUsageInfo,\n type DeepThinkOption,\n} from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(vlMode)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n modelFamily,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n console.warn(\n `[Midscene] AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n modelFamily,\n}: {\n deepThink?: DeepThinkOption;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (modelFamily === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (modelFamily === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n if (modelFamily === 'glm-v') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`,\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${modelFamily ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${modelFamily ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","modelFamily","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","isAutoGLM","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","Array","item","normalized","value","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","jsonString","String"],"mappings":";;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;ACqBA,eAAeI,iBAAiB,EAC9BC,WAAW,EAGZ;IAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGZ;IAEJ,IAAIa;IACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIf,WAAW;QACbY,WAAW,oBAAoBE,iBAAiBd;QAChD,IAAIkB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBa,WAAW,qBAAqBE,iBAAiBf;QACjD,IAAImB,sBAAAA,WAAWA,EACbC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIlB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAEzB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAMgC,gBAAgB;QACpB,SAAS7B;QACT,QAAQC;QAGR,GAAIQ,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGP,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOM,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,CAAAA,yBAAAA,EAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;QACA,IAAIlB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACI,oBAAAA,uBAAuBA,GACjE;QACA,IAAIrB,sBAAAA,WAAWA,EACb,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnCjC;QACAI;QACAC;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC9C,WAAyB,EACzB+C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV7C,SAAS,EACTI,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACNC,WAAW,EACZ,GAAG,MAAMX,iBAAiB;QACzBC;IACF;IAEA,MAAMiD,YACJZ,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACa,oBAAAA,yBAAyBA,KACvEb,oBAAAA,mBAAAA,CAAAA,yBAA6C,CAACc,oBAAAA,iBAAiBA;IACjE,MAAMC,YAAYrC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAC3B,MAAMsC,oBAAoBtC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IACnC,MAAMuC,qBAAqBvC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAczD,YAAY,WAAW,IAAI;IAE/C,MAAM0D,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY5D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMmE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIxC,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI2D,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAU3D,SAAS;QACpB0D,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWzB,SAAS;QACpBrC;IACF;IACA,IAAI4D,cACFlB,UAAUkB;IAEZ,IAAIC,gBAAgB;QAClBnB,UAAUmB;QACVlD,QAAQ,IAAI,CAACkD;IACf;IAEA,IAAI;QACFnB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEvD,WAAW;QAGrE,IAAIuD,aAAa;YACf,MAAMe,SAAU,MAAMzB,WAAW,MAAM,CACrC;gBACE,OAAO7C;gBACP2C;gBACA,GAAGqB,YAAY;gBACf,GAAGE,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMd,UAAUe,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbZ,QAAQY,MAAM,KAAK;gBAGrB,IAAIf,WAAWgB,mBAAmB;oBAChCf,eAAeD;oBACfE,wBAAwBc;oBACxB,MAAMC,YAAiC;wBACrCjB;wBACAgB;wBACAf;wBACA,YAAY;wBACZ,OAAOiB;oBACT;oBACA9B,QAAQ,OAAO,CAAE6B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCX,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACnB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAegB;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTpB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEiC;oBACjB;gBACF;YACF;YACArB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEsD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE1H,OAAO;YAEL,MAAMwB,aAAajF,YAAY,UAAU,IAAI;YAC7C,MAAMkF,gBAAgBlF,YAAY,aAAa,IAAI;YACnD,MAAMmF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMtC,WAAW,MAAM,CAAC;oBACrC,OAAO7C;oBACP2C;oBACA,GAAGqB,YAAY;oBACf,GAAGE,eAAe;gBACpB;gBAEAN,WAAWP,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAE8E,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEvB,SAAS,aAAa,EAAEuB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE7B,eAAe,IAAI;gBAG9VH,mBACE,CAAC,oBAAoB,EAAEiC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI5D,MACR,CAAC,mCAAmC,EAAE6D,KAAK,SAAS,CAACD,SAAS;gBAIlE3B,UAAU2B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3C,IAAI,CAAC3B,SACH,MAAM,IAAIjC,MAAM;gBAGlBmC,uBACGyB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3DxB,QAAQwB,OAAO,KAAK;gBACpB;YACF,EAAE,OAAOtD,OAAO;gBACdoD,YAAYpD;gBACZ,IAAIqD,UAAUF,aAAa;oBACzB9D,QAAQ,IAAI,CACV,CAAC,mCAAmC,EAAEgE,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAEhI,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACvB,SACH,MAAMyB;QAEV;QAEAhC,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QAGxC,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEpB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAegB;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASnB,WAAW;YACpB,mBAAmBE,wBAAwBgB;YAC3C,OAAOb,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAOiC,GAAQ;QACftE,QAAQ,KAAK,CAAC,kBAAkBsE;QAChC,MAAMC,WAAW,IAAIlE,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEvD,UAAU,GAAG,EAAEwF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB/C,QAAsC,EACtC9C,WAAyB,EACzB+C,OAEC;IAOD,MAAM+C,WAAW,MAAMjD,OAAOC,UAAU9C,aAAa;QACnD,WAAW+C,SAAS;IACtB;IACAgD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOD,UAAU;IACjB,MAAMrF,SAAST,YAAY,MAAM;IACjC,MAAMgG,cAAcC,cAAcH,SAAS,OAAO,EAAErF;IACpDsF,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAuB,YAAvB,OAAOC,aACP,CAAC,0CAA0C,EAAEhG,YAAY,SAAS,CAAC,GAAG,EAAE8F,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZnG,WAAyB;IAEzB,MAAM,EAAE2D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAOsD,MAAMnG;IAC9C,OAAO;QAAE2D;QAASG;IAAM;AAC1B;AAEO,SAASsC,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASjC,uBAAuB,EACrCkC,SAAS,EACThG,WAAW,EAIZ;IAKC,MAAMiG,sBAAsBD,AAAc,YAAdA,YAAwB7B,SAAY6B;IAEhE,IAAIC,AAAwB9B,WAAxB8B,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAAc9B;IAAU;IAG/C,IAAInE,AAAgB,eAAhBA,aACF,OAAO;QACL,QAAQ;YAAE,iBAAiBiG;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIjG,AAAgB,oBAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,IAAIjG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,UAAU,CAAC;IAC7G;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAEjG,eAAe,UAAU,CAAC,CAAC;QACzF,gBAAgB,CAAC,0DAA0D,EAAEA,eAAe,UAAU,EAAE,CAAC;IAC3G;AACF;AAQA,SAASkG,oBAAoBhH,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIiH,MAAM,OAAO,CAACjH,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACkH,OAASF,oBAAoBE;IAI/C,IAAI,AAAe,YAAf,OAAOlH,KAAkB;QAC3B,MAAMmH,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACrH,KAAKsH,MAAM,IAAIrH,OAAO,OAAO,CAACC,KAAM;YAE9C,MAAMqH,aAAavH,IAAI,IAAI;YAG3B,IAAIwH,kBAAkBN,oBAAoBI;YAG1C,IAAI,AAA2B,YAA3B,OAAOE,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCH,UAAU,CAACE,WAAW,GAAGC;QAC3B;QAEA,OAAOH;IACT;IAGA,IAAI,AAAe,YAAf,OAAOnH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASqG,cAAcQ,KAAa,EAAEhG,MAAgC;IAC3E,MAAM0G,kBAAkBf,yBAAyBK;IAEjD,IAAIU,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAIvF;IAGT,IAAIV;IACJ,IAAIkE;IACJ,IAAI;QACFlE,SAASqE,KAAK,KAAK,CAAC4B;QACpB,OAAOP,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IACA,IAAI;QACFd,SAASqE,KAAK,KAAK,CAAC6B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWD;QAC/B,OAAOP,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IAEA,IAAIvB,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAM4G,aAAab,yBAAyBW;QAC5C,IAAI;YACFjG,SAASqE,KAAK,KAAK,CAAC6B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;YAC/B,OAAOT,oBAAoB1F;QAC7B,EAAE,OAAOc,OAAO;YACdoD,YAAYpD;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAE4F,OACjDlC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}
@@ -49,7 +49,7 @@ async function uiTarsPlanning(userInstruction, options) {
49
49
  let instruction = userInstruction;
50
50
  if (actionContext) instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\n<user_instruction>${userInstruction}</user_instruction>`;
51
51
  const systemPrompt = (0, ui_tars_planning_js_namespaceObject.getUiTarsPlanningPrompt)() + instruction;
52
- const screenshotBase64 = context.screenshot.getData();
52
+ const screenshotBase64 = context.screenshot.base64;
53
53
  const imagePayload = await resizeImageForUiTars(screenshotBase64, context.size, uiTarsModelVersion);
54
54
  conversationHistory.append({
55
55
  role: 'user',
@@ -92,47 +92,50 @@ async function uiTarsPlanning(userInstruction, options) {
92
92
  if ('click' === actionType) {
93
93
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
94
94
  const point = getPoint(action.action_inputs.start_box, size);
95
+ const locate = {
96
+ prompt: action.thought || '',
97
+ bbox: pointToBbox({
98
+ x: point[0],
99
+ y: point[1]
100
+ }, size.width, size.height)
101
+ };
95
102
  transformActions.push({
96
103
  type: 'Tap',
97
104
  param: {
98
- locate: {
99
- prompt: action.thought || '',
100
- bbox: pointToBbox({
101
- x: point[0],
102
- y: point[1]
103
- }, size.width, size.height)
104
- }
105
+ locate: locate
105
106
  }
106
107
  });
107
108
  } else if ('left_double' === actionType) {
108
109
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
109
110
  const point = getPoint(action.action_inputs.start_box, size);
111
+ const locate = {
112
+ prompt: action.thought || '',
113
+ bbox: pointToBbox({
114
+ x: point[0],
115
+ y: point[1]
116
+ }, size.width, size.height)
117
+ };
110
118
  transformActions.push({
111
119
  type: 'DoubleClick',
112
120
  param: {
113
- locate: {
114
- prompt: action.thought || '',
115
- bbox: pointToBbox({
116
- x: point[0],
117
- y: point[1]
118
- }, size.width, size.height)
119
- }
121
+ locate: locate
120
122
  },
121
123
  thought: action.thought || ''
122
124
  });
123
125
  } else if ('right_single' === actionType) {
124
126
  (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
125
127
  const point = getPoint(action.action_inputs.start_box, size);
128
+ const locate = {
129
+ prompt: action.thought || '',
130
+ bbox: pointToBbox({
131
+ x: point[0],
132
+ y: point[1]
133
+ }, size.width, size.height)
134
+ };
126
135
  transformActions.push({
127
136
  type: 'RightClick',
128
137
  param: {
129
- locate: {
130
- prompt: action.thought || '',
131
- bbox: pointToBbox({
132
- x: point[0],
133
- y: point[1]
134
- }, size.width, size.height)
135
- }
138
+ locate: locate
136
139
  },
137
140
  thought: action.thought || ''
138
141
  });