npm - @midscene/core - Versions diffs - 0.28.12-beta-20250923114831.0 → 0.28.12-beta-20250923124052.0 - Mend

@midscene/core 0.28.12-beta-20250923114831.0 → 0.28.12-beta-20250923124052.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/dist/es/agent/agent.mjs +1 -1
package/dist/es/agent/agent.mjs.map +1 -1
package/dist/es/agent/tasks.mjs +45 -160
package/dist/es/agent/tasks.mjs.map +1 -1
package/dist/es/agent/utils.mjs +1 -1
package/dist/es/ai-model/conversation-history.mjs +58 -0
package/dist/es/ai-model/conversation-history.mjs.map +1 -0
package/dist/es/ai-model/index.mjs +3 -2
package/dist/es/ai-model/llm-planning.mjs +44 -12
package/dist/es/ai-model/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/prompt/llm-planning.mjs +7 -61
package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
package/dist/es/ai-model/ui-tars-planning.mjs +40 -18
package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
package/dist/es/types.mjs.map +1 -1
package/dist/es/utils.mjs +2 -2
package/dist/lib/agent/agent.js +1 -1
package/dist/lib/agent/agent.js.map +1 -1
package/dist/lib/agent/tasks.js +44 -159
package/dist/lib/agent/tasks.js.map +1 -1
package/dist/lib/agent/utils.js +1 -1
package/dist/lib/ai-model/conversation-history.js +92 -0
package/dist/lib/ai-model/conversation-history.js.map +1 -0
package/dist/lib/ai-model/index.js +8 -4
package/dist/lib/ai-model/llm-planning.js +43 -11
package/dist/lib/ai-model/llm-planning.js.map +1 -1
package/dist/lib/ai-model/prompt/llm-planning.js +7 -67
package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
package/dist/lib/ai-model/service-caller/index.js.map +1 -1
package/dist/lib/ai-model/ui-tars-planning.js +42 -20
package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
package/dist/lib/types.js.map +1 -1
package/dist/lib/utils.js +2 -2
package/dist/types/agent/tasks.d.ts +4 -17
package/dist/types/ai-model/conversation-history.d.ts +18 -0
package/dist/types/ai-model/index.d.ts +2 -1
package/dist/types/ai-model/llm-planning.d.ts +2 -1
package/dist/types/ai-model/prompt/llm-planning.d.ts +0 -6
package/dist/types/ai-model/service-caller/index.d.ts +1 -1
package/dist/types/ai-model/ui-tars-planning.d.ts +6 -18
package/dist/types/types.d.ts +0 -1
package/package.json +3 -3

package/dist/lib/ai-model/prompt/llm-planning.js CHANGED Viewed

@@ -26,15 +26,11 @@ __webpack_require__.r(__webpack_exports__);
 __webpack_require__.d(__webpack_exports__, {
     systemPromptToTaskPlanning: ()=>systemPromptToTaskPlanning,
     descriptionForAction: ()=>descriptionForAction,
-    generateTaskBackgroundContext: ()=>generateTaskBackgroundContext,
-    planSchema: ()=>planSchema,
-    automationUserPrompt: ()=>automationUserPrompt
+    planSchema: ()=>planSchema
 });
-const prompts_namespaceObject = require("@langchain/core/prompts");
 const external_common_js_namespaceObject = require("../common.js");
 const external_common_js_namespaceObject_1 = require("./common.js");
-const vlCoTLog = '"what_the_user_wants_to_do_next_by_instruction": string, // What the user wants to do according to the instruction and previous logs. ';
-const vlCurrentLog = '"log": string, // Log what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like "Now i want to use action \'{ action-type }\' to do .. first". If no action should be done, log the reason. ". Use the same language as the user\'s instruction.';
+const vlCurrentLog = '"log": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like "The user wants to do ... . According to the instruction and the previous logs, i think the next action should be ....". If no action should be done, log the reason. Use the same language as the user\'s instruction.';
 const llmCurrentLog = '"log": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like "Now i want to use action \'{ action-type }\' to do ..". If no action should be done, log the reason. ". Use the same language as the user\'s instruction.';
 const commonOutputFields = `"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.
   "more_actions_needed_by_instruction": boolean, // Consider if there is still more action(s) to do after the action in "Log" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;
@@ -113,7 +109,7 @@ const systemTemplateOfVLPlanning = ({ actionSpace, vlMode })=>{
     const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam()));
     const actionList = actionDescriptionList.join('\n');
     return `
-Target: User will give you a screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires.
+Target: User will give you a latest screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires.
 Restriction:
 - Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.
@@ -129,7 +125,6 @@ Field description:
 Return in JSON format:
 {
-  ${vlCoTLog}
   ${vlCurrentLog}
   ${commonOutputFields}
   "action":
@@ -140,14 +135,12 @@ Return in JSON format:
   "sleep"?: number, // The sleep time after the action, in milliseconds.
 }
-For example, when the instruction is "click 'Confirm' button, and click 'Yes' in popup" and the log is "I will use action Tap to click 'Confirm' button", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.
+For example, when the instruction is "click 'Confirm' button, and click 'Yes' in popup" and the previous log shows "The 'Confirm' button has been clicked", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.
 this and output the JSON:
 {
-  "what_the_user_wants_to_do_next_by_instruction": "We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup",
-  "log": "I will use action Tap to click 'Yes' in popup",
-  "more_actions_needed_by_instruction": false,
+  "log": "The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, i think the next action should be click 'Yes' in popup.",
   "action": {
     "type": "Tap",
     "param": {
@@ -156,7 +149,8 @@ this and output the JSON:
         "prompt": "The 'Yes' button in popup"
       }
     }
-  }
+  },
+  "more_actions_needed_by_instruction": false,
 }
 `;
 };
@@ -383,65 +377,11 @@ const planSchema = {
         }
     }
 };
-const generateTaskBackgroundContext = (userInstruction, log, userActionContext)=>{
-    if (log) return `
-Here is the user's instruction:
-<instruction>
-  <high_priority_knowledge>
-    ${userActionContext}
-  </high_priority_knowledge>
-  ${userInstruction}
-</instruction>
-These are the logs from previous executions, which indicate what was done in the previous actions.
-Do NOT repeat these actions.
-<previous_logs>
-${log}
-</previous_logs>
-`;
-    return `
-Here is the user's instruction:
-<instruction>
-  <high_priority_knowledge>
-    ${userActionContext}
-  </high_priority_knowledge>
-  ${userInstruction}
-</instruction>
-`;
-};
-const automationUserPrompt = (vlMode)=>{
-    if (vlMode) return new prompts_namespaceObject.PromptTemplate({
-        template: '{taskBackgroundContext}',
-        inputVariables: [
-            'taskBackgroundContext'
-        ]
-    });
-    return new prompts_namespaceObject.PromptTemplate({
-        template: `
-pageDescription:
-=====================================
-{pageDescription}
-=====================================
-{taskBackgroundContext}`,
-        inputVariables: [
-            "pageDescription",
-            'taskBackgroundContext'
-        ]
-    });
-};
-exports.automationUserPrompt = __webpack_exports__.automationUserPrompt;
 exports.descriptionForAction = __webpack_exports__.descriptionForAction;
-exports.generateTaskBackgroundContext = __webpack_exports__.generateTaskBackgroundContext;
 exports.planSchema = __webpack_exports__.planSchema;
 exports.systemPromptToTaskPlanning = __webpack_exports__.systemPromptToTaskPlanning;
 for(var __webpack_i__ in __webpack_exports__)if (-1 === [
-    "automationUserPrompt",
     "descriptionForAction",
-    "generateTaskBackgroundContext",
     "planSchema",
     "systemPromptToTaskPlanning"
 ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];

package/dist/lib/ai-model/prompt/llm-planning.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import assert from 'node:assert';\nimport type { DeviceAction } from '@/types';\nimport { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { ZodObject, z } from 'zod';\nimport { ifMidsceneLocatorField } from '../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\nconst vlCoTLog = `\"what_the_user_wants_to_do_next_by_instruction\": string, // What the user wants to do according to the instruction and previous logs. `;\nconst vlCurrentLog = `\"log\": string, // Log what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like \"Now i want to use action '{ action-type }' to do .. first\". If no action should be done, log the reason. \". Use the same language as the user's instruction.`;\nconst llmCurrentLog = `\"log\": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like \"Now i want to use action '{ action-type }' to do ..\". If no action should be done, log the reason. \". Use the same language as the user's instruction.`;\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\nconst vlLocateParam = () =>\n '{bbox: [number, number, number, number], prompt: string }';\nconst llmLocateParam = () => '{\"id\": string, \"prompt\": string}';\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n // Try to extract parameter information from the zod schema\n // For zod object schemas, extract type information and descriptions\n const shape = (action.paramSchema as ZodObject<any>).shape;\n\n if (!shape) {\n console.warn(\n `action.paramSchema is not a ZodObject, may lead to unexpected behavior, action name: ${action.name}`,\n );\n }\n\n const paramLines: string[] = [];\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] \| undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string \| null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description \|\| null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description \|\| null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n if (paramLines.length > 0) {\n fields.push('- param:');\n for (const paramLine of paramLines) {\n fields.push(` - ${paramLine}`);\n }\n }\n }\n\n return `- ${action.name}, ${action.description \|\| 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nconst systemTemplateOfVLPlanning = ({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n}) => {\n const actionNameList = actionSpace.map((action) => action.name).join(', ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, vlLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\nTarget: User will give you a screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Always give ONLY ONE action in \\`log\\` field (or null if no action should be done), instead of multiple actions. Supported actions are ${actionNameList}.\n- Don't repeat actions in the previous logs.\n- Bbox is the bounding box of the element to be located. It's an array of 4 numbers, representing ${bboxDescription(vlMode)}.\n\nSupporting actions:\n${actionList}\n\nField description:\n* The \\`prompt\\` field inside the \\`locate\\` field is a short description that could be used to locate the element.\n\nReturn in JSON format:\n{\n ${vlCoTLog}\n ${vlCurrentLog}\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } \| null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the log is \"I will use action Tap to click 'Confirm' button\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n \"what_the_user_wants_to_do_next_by_instruction\": \"We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup\",\n \"log\": \"I will use action Tap to click 'Yes' in popup\",\n \"more_actions_needed_by_instruction\": false,\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n \"bbox\": [100, 100, 200, 200],\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n }\n}\n`;\n};\n\nconst systemTemplateOfLLM = ({\n actionSpace,\n}: { actionSpace: DeviceAction<any>[] }) => {\n const actionNameList = actionSpace.map((action) => action.name).join(' / ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, llmLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\n## Role\n\nYou are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.\n\n## Objective\n\n- Decompose the instruction user asked into a series of actions\n- Locate the target element if possible\n- If the instruction cannot be accomplished, give a further plan.\n\n## Workflow\n\n1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.\n2. Decompose the user's task into a sequence of feasible actions, and place it in the \\`actions\\` field. There are different types of actions (${actionNameList}). The \"About the action\" section below will give you more details.\n3. Consider whether the user's instruction will be accomplished after the actions you composed.\n- If the instruction is accomplished, set \\`more_actions_needed_by_instruction\\` to false.\n- If more actions are needed, set \\`more_actions_needed_by_instruction\\` to true. Get ready to hand over to the next talent people like you. Carefully log what have been done in the \\`log\\` field, he or she will continue the task according to your logs.\n4. If the task is not feasible on this page, set \\`error\\` field to the reason.\n\n## Constraints\n\n- All the actions you composed MUST be feasible, which means all the action fields can be filled with the page context information you get. If not, don't plan this action.\n- Trust the \"What have been done\" field about the task (if any), don't repeat actions in it.\n- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \\`\\`\\`json\\`\\`\\`.\n- If the screenshot and the instruction are totally irrelevant, set reason in the \\`error\\` field.\n\n## About the \\`actions\\` field\n\nThe \\`locate\\` param is commonly used in the \\`param\\` field of the action, means to locate the target element to perform the action, it conforms to the following scheme:\n\ntype LocateParam = {\n \"id\": string, // the id of the element found. It should either be the id marked with a rectangle in the screenshot or the id described in the description.\n \"prompt\"?: string // the description of the element to find. It can only be omitted when locate is null.\n} \| null // If it's not on the page, the LocateParam should be null\n\n## Supported actions\n\nEach action has a \\`type\\` and corresponding \\`param\\`. To be detailed:\n${actionList}\n\n`.trim();\n};\n\nconst outputTemplate = `\n## Output JSON Format:\n\nThe JSON format is as follows:\n\n{\n \"actions\": [\n // ... some actions\n ],\n ${llmCurrentLog}\n ${commonOutputFields}\n}\n\n## Examples\n\n### Example: Decompose a task\n\nWhen you received the following information:\n\n* Instruction: 'Click the language switch button, wait 1s, click \"English\"'\n* Logs: null\n* Page Context (screenshot and description) shows: There is a language switch button, and the \"English\" option is not shown in the screenshot now.\n\nBy viewing the page screenshot and description, you should consider this and output the JSON:\n\n* The user intent is: tap the switch button, sleep, and tap the 'English' option\n* The language switch button is shown in the screenshot, and can be located by the page description or the id marked with a rectangle. So we can plan a Tap action to do this.\n* Plan a Sleep action to wait for 1 second to ensure the language options are displayed.\n* The \"English\" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So don't plan any action to do this.\n* Log what these action do: Click the language switch button to open the language options. Wait for 1 second.\n* The task cannot be accomplished (because the last tapping action is not finished yet), so the \\`more_actions_needed_by_instruction\\` field is true. The \\`error\\` field is null.\n\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": { id: \"c81c4e9a33\", prompt: \"The language switch button\" }\n }\n },\n {\n \"thought\": \"Wait for 1 second to ensure the language options are displayed.\",\n \"type\": \"Sleep\",\n \"param\": { \"timeMs\": 1000 },\n }\n ],\n \"error\": null,\n \"more_actions_needed_by_instruction\": true,\n \"log\": \"Click the language switch button to open the language options. Wait for 1 second\",\n}\n\n### Example: What NOT to do\nWrong output:\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \"id\": \"c81c4e9a33\" } // WRONG: prompt is missing, this is not a valid LocateParam\n }\n },\n {\n \"thought\": \"Click the English option\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": null // WRONG: if the element is not on the page, you should not plan this action\n }\n }\n ],\n \"more_actions_needed_by_instruction\": false, // WRONG: should be true\n \"log\": \"Click the language switch button to open the language options\",\n}\n`;\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n}) {\n if (vlMode) {\n return systemTemplateOfVLPlanning({ actionSpace, vlMode });\n }\n\n return `${systemTemplateOfLLM({ actionSpace })}\\n\\n${outputTemplate}`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n\nexport const generateTaskBackgroundContext = (\n userInstruction: string,\n log?: string,\n userActionContext?: string,\n) => {\n if (log) {\n return `\nHere is the user's instruction:\n\n<instruction>\n <high_priority_knowledge>\n ${userActionContext}\n </high_priority_knowledge>\n\n ${userInstruction}\n</instruction>\n\nThese are the logs from previous executions, which indicate what was done in the previous actions.\nDo NOT repeat these actions.\n<previous_logs>\n${log}\n</previous_logs>\n`;\n }\n\n return `\nHere is the user's instruction:\n<instruction>\n <high_priority_knowledge>\n ${userActionContext}\n </high_priority_knowledge>\n\n ${userInstruction}\n</instruction>\n`;\n};\n\nexport const automationUserPrompt = (vlMode: TVlModeTypes \| undefined) => {\n if (vlMode) {\n return new PromptTemplate({\n template: '{taskBackgroundContext}',\n inputVariables: ['taskBackgroundContext'],\n });\n }\n\n return new PromptTemplate({\n template: `\npageDescription:\n=====================================\n{pageDescription}\n=====================================\n\n{taskBackgroundContext}`,\n inputVariables: ['pageDescription', 'taskBackgroundContext'],\n });\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","vlCoTLog","vlCurrentLog","llmCurrentLog","commonOutputFields","vlLocateParam","llmLocateParam","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","shape","console","paramLines","getTypeName","field","_actualField__def","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","_actualField__def_values","values","option","String","getDescription","isOptional","keyWithOptional","description","paramLine","systemTemplateOfVLPlanning","actionSpace","vlMode","actionNameList","actionDescriptionList","actionList","bboxDescription","systemTemplateOfLLM","outputTemplate","systemPromptToTaskPlanning","planSchema","generateTaskBackgroundContext","userInstruction","log","userActionContext","automationUserPrompt","PromptTemplate"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACIA,MAAMI,WAAW;AACjB,MAAMC,eAAe;AACrB,MAAMC,gBAAgB;AAEtB,MAAMC,qBAAqB,CAAC;+NACmM,CAAC;AAChO,MAAMC,gBAAgB,IACpB;AACF,MAAMC,iBAAiB,IAAM;AAEtB,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QAGtB,MAAMI,QAASJ,OAAO,WAAW,CAAoB,KAAK;QAE1D,IAAI,CAACI,OACHC,QAAQ,IAAI,CACV,CAAC,qFAAqF,EAAEL,OAAO,IAAI,EAAE;QAIzG,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,cAAc,CAACC;gBAoBGC;YAlBtB,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAEA,MAAME,cAAcH,YAAYF;YAChC,MAAMM,gBAAgB,QAAAL,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ;YAEhD,IAAIK,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;YAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;YACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;gBAEjC,IAAIC,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,cACzB,OAAOZ;gBAET,OAAO;YACT;YACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAE5BE,0BAAAA;gBADH,MAAMC,SACJ,AAAC,SAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,mBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBACG,GAAG,CAAC,CAACE,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,IAAI,CAAC,KAAI,KAAK;gBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;YAC1B;YAEAZ,QAAQ,IAAI,CACV,2EACAQ,YAAY,IAAI;YAElB,OAAOA,YAAY,QAAQ;QAC7B;QAGA,MAAMO,iBAAiB,CAACZ;gBAgClBC;YA9BJ,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAGA,IAAI,iBAAiBH,OACnB,OAAOA,MAAM,WAAW,IAAI;YAG9B,MAAMK,cAAcH,YAAYF;YAGhC,IAAI,iBAAiBK,aACnB,OAAOA,YAAY,WAAW,IAAI;YAIpC,IAAIJ,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,aACjC;gBAAA,IAAI,kCAAkCI,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;YACT;YAGF,OAAO;QACT;QAEA,KAAK,MAAM,CAACzB,KAAKoB,MAAM,IAAInB,OAAO,OAAO,CAACe,OACxC,IAAII,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;YAEtC,MAAMa,aACJ,AAAqC,cAArC,OAAQb,MAAc,UAAU,IAC/BA,MAAc,UAAU;YAC3B,MAAMc,kBAAkBD,aAAa,GAAGjC,IAAI,CAAC,CAAC,GAAGA;YAGjD,MAAMwB,WAAWL,YAAYC;YAG7B,MAAMe,cAAcH,eAAeZ;YAGnC,IAAIgB,YAAY,GAAGF,gBAAgB,EAAE,EAAEV,UAAU;YACjD,IAAIW,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;YAGnCjB,WAAW,IAAI,CAACkB;QAClB;QAGF,IAAIlB,WAAW,MAAM,GAAG,GAAG;YACzBH,OAAO,IAAI,CAAC;YACZ,KAAK,MAAMqB,aAAalB,WACtBH,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEqB,WAAW;QAElC;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEA,MAAMuB,6BAA6B,CAAC,EAClCC,WAAW,EACXC,MAAM,EAIP;IACC,MAAMC,iBAAiBF,YAAY,GAAG,CAAC,CAAC1B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM6B,wBAAwBH,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBAAqBC,QAAQH;IAEtC,MAAMiC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;yIAK+H,EAAED,eAAe;;kGAExD,EAAEG,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBJ,QAAQ;;;AAG5H,EAAEG,WAAW;;;;;;;EAOX,EAAErC,SAAS;EACX,EAAEC,aAAa;EACf,EAAEE,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;AA2BvB,CAAC;AACD;AAEA,MAAMoC,sBAAsB,CAAC,EAC3BN,WAAW,EAC0B;IACrC,MAAME,iBAAiBF,YAAY,GAAG,CAAC,CAAC1B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM6B,wBAAwBH,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBAAqBC,QAAQF;IAEtC,MAAMgC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;;;;;;;;;;+IAcqI,EAAED,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;AAyBhK,EAAEE,WAAW;;AAEb,CAAC,CAAC,IAAI;AACN;AAEA,MAAMG,iBAAiB,CAAC;;;;;;;;;EAStB,EAAEtC,cAAc;EAChB,EAAEC,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgEvB,CAAC;AAEM,eAAesC,2BAA2B,EAC/CR,WAAW,EACXC,MAAM,EAIP;IACC,IAAIA,QACF,OAAOF,2BAA2B;QAAEC;QAAaC;IAAO;IAG1D,OAAO,GAAGK,oBAAoB;QAAEN;IAAY,GAAG,IAAI,EAAEO,gBAAgB;AACvE;AAEO,MAAME,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF;AAEO,MAAMC,gCAAgC,CAC3CC,iBACAC,KACAC;IAEA,IAAID,KACF,OAAO,CAAC;;;;;IAKR,EAAEC,kBAAkB;;;EAGtB,EAAEF,gBAAgB;;;;;;AAMpB,EAAEC,IAAI;;AAEN,CAAC;IAGC,OAAO,CAAC;;;;IAIN,EAAEC,kBAAkB;;;EAGtB,EAAEF,gBAAgB;;AAEpB,CAAC;AACD;AAEO,MAAMG,uBAAuB,CAACb;IACnC,IAAIA,QACF,OAAO,IAAIc,wBAAAA,cAAcA,CAAC;QACxB,UAAU;QACV,gBAAgB;YAAC;SAAwB;IAC3C;IAGF,OAAO,IAAIA,wBAAAA,cAAcA,CAAC;QACxB,UAAU,CAAC;;;;;;uBAMQ,CAAC;QACpB,gBAAgB;YAAC;YAAmB;SAAwB;IAC9D;AACF"}
1	+ {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import assert from 'node:assert';\nimport type { DeviceAction } from '@/types';\nimport { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { ZodObject, z } from 'zod';\nimport { ifMidsceneLocatorField } from '../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\nconst vlCurrentLog = `\"log\": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The typical log looks like \"The user wants to do ... . According to the instruction and the previous logs, i think the next action should be ....\". If no action should be done, log the reason. Use the same language as the user's instruction.`;\nconst llmCurrentLog = `\"log\": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like \"Now i want to use action '{ action-type }' to do ..\". If no action should be done, log the reason. \". Use the same language as the user's instruction.`;\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\nconst vlLocateParam = () =>\n '{bbox: [number, number, number, number], prompt: string }';\nconst llmLocateParam = () => '{\"id\": string, \"prompt\": string}';\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n // Try to extract parameter information from the zod schema\n // For zod object schemas, extract type information and descriptions\n const shape = (action.paramSchema as ZodObject<any>).shape;\n\n if (!shape) {\n console.warn(\n `action.paramSchema is not a ZodObject, may lead to unexpected behavior, action name: ${action.name}`,\n );\n }\n\n const paramLines: string[] = [];\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] \| undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string \| null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' \|\|\n typeName === 'ZodNullable' \|\|\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description \|\| null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description \|\| null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n if (paramLines.length > 0) {\n fields.push('- param:');\n for (const paramLine of paramLines) {\n fields.push(` - ${paramLine}`);\n }\n }\n }\n\n return `- ${action.name}, ${action.description \|\| 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nconst systemTemplateOfVLPlanning = ({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n}) => {\n const actionNameList = actionSpace.map((action) => action.name).join(', ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, vlLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\nTarget: User will give you a latest screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Always give ONLY ONE action in \\`log\\` field (or null if no action should be done), instead of multiple actions. Supported actions are ${actionNameList}.\n- Don't repeat actions in the previous logs.\n- Bbox is the bounding box of the element to be located. It's an array of 4 numbers, representing ${bboxDescription(vlMode)}.\n\nSupporting actions:\n${actionList}\n\nField description:\n* The \\`prompt\\` field inside the \\`locate\\` field is a short description that could be used to locate the element.\n\nReturn in JSON format:\n{\n ${vlCurrentLog}\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } \| null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the previous log shows \"The 'Confirm' button has been clicked\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n \"log\": \"The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, i think the next action should be click 'Yes' in popup.\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n \"bbox\": [100, 100, 200, 200],\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n },\n \"more_actions_needed_by_instruction\": false,\n}\n`;\n};\n\nconst systemTemplateOfLLM = ({\n actionSpace,\n}: { actionSpace: DeviceAction<any>[] }) => {\n const actionNameList = actionSpace.map((action) => action.name).join(' / ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, llmLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\n## Role\n\nYou are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.\n\n## Objective\n\n- Decompose the instruction user asked into a series of actions\n- Locate the target element if possible\n- If the instruction cannot be accomplished, give a further plan.\n\n## Workflow\n\n1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.\n2. Decompose the user's task into a sequence of feasible actions, and place it in the \\`actions\\` field. There are different types of actions (${actionNameList}). The \"About the action\" section below will give you more details.\n3. Consider whether the user's instruction will be accomplished after the actions you composed.\n- If the instruction is accomplished, set \\`more_actions_needed_by_instruction\\` to false.\n- If more actions are needed, set \\`more_actions_needed_by_instruction\\` to true. Get ready to hand over to the next talent people like you. Carefully log what have been done in the \\`log\\` field, he or she will continue the task according to your logs.\n4. If the task is not feasible on this page, set \\`error\\` field to the reason.\n\n## Constraints\n\n- All the actions you composed MUST be feasible, which means all the action fields can be filled with the page context information you get. If not, don't plan this action.\n- Trust the \"What have been done\" field about the task (if any), don't repeat actions in it.\n- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \\`\\`\\`json\\`\\`\\`.\n- If the screenshot and the instruction are totally irrelevant, set reason in the \\`error\\` field.\n\n## About the \\`actions\\` field\n\nThe \\`locate\\` param is commonly used in the \\`param\\` field of the action, means to locate the target element to perform the action, it conforms to the following scheme:\n\ntype LocateParam = {\n \"id\": string, // the id of the element found. It should either be the id marked with a rectangle in the screenshot or the id described in the description.\n \"prompt\"?: string // the description of the element to find. It can only be omitted when locate is null.\n} \| null // If it's not on the page, the LocateParam should be null\n\n## Supported actions\n\nEach action has a \\`type\\` and corresponding \\`param\\`. To be detailed:\n${actionList}\n\n`.trim();\n};\n\nconst outputTemplate = `\n## Output JSON Format:\n\nThe JSON format is as follows:\n\n{\n \"actions\": [\n // ... some actions\n ],\n ${llmCurrentLog}\n ${commonOutputFields}\n}\n\n## Examples\n\n### Example: Decompose a task\n\nWhen you received the following information:\n\n* Instruction: 'Click the language switch button, wait 1s, click \"English\"'\n* Logs: null\n* Page Context (screenshot and description) shows: There is a language switch button, and the \"English\" option is not shown in the screenshot now.\n\nBy viewing the page screenshot and description, you should consider this and output the JSON:\n\n* The user intent is: tap the switch button, sleep, and tap the 'English' option\n* The language switch button is shown in the screenshot, and can be located by the page description or the id marked with a rectangle. So we can plan a Tap action to do this.\n* Plan a Sleep action to wait for 1 second to ensure the language options are displayed.\n* The \"English\" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So don't plan any action to do this.\n* Log what these action do: Click the language switch button to open the language options. Wait for 1 second.\n* The task cannot be accomplished (because the last tapping action is not finished yet), so the \\`more_actions_needed_by_instruction\\` field is true. The \\`error\\` field is null.\n\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": { id: \"c81c4e9a33\", prompt: \"The language switch button\" }\n }\n },\n {\n \"thought\": \"Wait for 1 second to ensure the language options are displayed.\",\n \"type\": \"Sleep\",\n \"param\": { \"timeMs\": 1000 },\n }\n ],\n \"error\": null,\n \"more_actions_needed_by_instruction\": true,\n \"log\": \"Click the language switch button to open the language options. Wait for 1 second\",\n}\n\n### Example: What NOT to do\nWrong output:\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \"id\": \"c81c4e9a33\" } // WRONG: prompt is missing, this is not a valid LocateParam\n }\n },\n {\n \"thought\": \"Click the English option\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": null // WRONG: if the element is not on the page, you should not plan this action\n }\n }\n ],\n \"more_actions_needed_by_instruction\": false, // WRONG: should be true\n \"log\": \"Click the language switch button to open the language options\",\n}\n`;\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes \| undefined;\n}) {\n if (vlMode) {\n return systemTemplateOfVLPlanning({ actionSpace, vlMode });\n }\n\n return `${systemTemplateOfLLM({ actionSpace })}\\n\\n${outputTemplate}`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","vlCurrentLog","llmCurrentLog","commonOutputFields","vlLocateParam","llmLocateParam","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","shape","console","paramLines","getTypeName","field","_actualField__def","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","_actualField__def_values","values","option","String","getDescription","isOptional","keyWithOptional","description","paramLine","systemTemplateOfVLPlanning","actionSpace","vlMode","actionNameList","actionDescriptionList","actionList","bboxDescription","systemTemplateOfLLM","outputTemplate","systemPromptToTaskPlanning","planSchema"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACIA,MAAMI,eAAe;AACrB,MAAMC,gBAAgB;AAEtB,MAAMC,qBAAqB,CAAC;+NACmM,CAAC;AAChO,MAAMC,gBAAgB,IACpB;AACF,MAAMC,iBAAiB,IAAM;AAEtB,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QAGtB,MAAMI,QAASJ,OAAO,WAAW,CAAoB,KAAK;QAE1D,IAAI,CAACI,OACHC,QAAQ,IAAI,CACV,CAAC,qFAAqF,EAAEL,OAAO,IAAI,EAAE;QAIzG,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,cAAc,CAACC;gBAoBGC;YAlBtB,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAEA,MAAME,cAAcH,YAAYF;YAChC,MAAMM,gBAAgB,QAAAL,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ;YAEhD,IAAIK,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;YAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;YACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;gBAEjC,IAAIC,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,cACzB,OAAOZ;gBAET,OAAO;YACT;YACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAE5BE,0BAAAA;gBADH,MAAMC,SACJ,AAAC,SAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,mBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBACG,GAAG,CAAC,CAACE,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,IAAI,CAAC,KAAI,KAAK;gBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;YAC1B;YAEAZ,QAAQ,IAAI,CACV,2EACAQ,YAAY,IAAI;YAElB,OAAOA,YAAY,QAAQ;QAC7B;QAGA,MAAMO,iBAAiB,CAACZ;gBAgClBC;YA9BJ,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAGA,IAAI,iBAAiBH,OACnB,OAAOA,MAAM,WAAW,IAAI;YAG9B,MAAMK,cAAcH,YAAYF;YAGhC,IAAI,iBAAiBK,aACnB,OAAOA,YAAY,WAAW,IAAI;YAIpC,IAAIJ,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,aACjC;gBAAA,IAAI,kCAAkCI,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;YACT;YAGF,OAAO;QACT;QAEA,KAAK,MAAM,CAACxB,KAAKmB,MAAM,IAAIlB,OAAO,OAAO,CAACc,OACxC,IAAII,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;YAEtC,MAAMa,aACJ,AAAqC,cAArC,OAAQb,MAAc,UAAU,IAC/BA,MAAc,UAAU;YAC3B,MAAMc,kBAAkBD,aAAa,GAAGhC,IAAI,CAAC,CAAC,GAAGA;YAGjD,MAAMuB,WAAWL,YAAYC;YAG7B,MAAMe,cAAcH,eAAeZ;YAGnC,IAAIgB,YAAY,GAAGF,gBAAgB,EAAE,EAAEV,UAAU;YACjD,IAAIW,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;YAGnCjB,WAAW,IAAI,CAACkB;QAClB;QAGF,IAAIlB,WAAW,MAAM,GAAG,GAAG;YACzBH,OAAO,IAAI,CAAC;YACZ,KAAK,MAAMqB,aAAalB,WACtBH,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEqB,WAAW;QAElC;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEA,MAAMuB,6BAA6B,CAAC,EAClCC,WAAW,EACXC,MAAM,EAIP;IACC,MAAMC,iBAAiBF,YAAY,GAAG,CAAC,CAAC1B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM6B,wBAAwBH,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBAAqBC,QAAQH;IAEtC,MAAMiC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;yIAK+H,EAAED,eAAe;;kGAExD,EAAEG,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBJ,QAAQ;;;AAG5H,EAAEG,WAAW;;;;;;;EAOX,EAAEpC,aAAa;EACf,EAAEE,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;AA0BvB,CAAC;AACD;AAEA,MAAMoC,sBAAsB,CAAC,EAC3BN,WAAW,EAC0B;IACrC,MAAME,iBAAiBF,YAAY,GAAG,CAAC,CAAC1B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM6B,wBAAwBH,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBAAqBC,QAAQF;IAEtC,MAAMgC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;;;;;;;;;;+IAcqI,EAAED,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;AAyBhK,EAAEE,WAAW;;AAEb,CAAC,CAAC,IAAI;AACN;AAEA,MAAMG,iBAAiB,CAAC;;;;;;;;;EAStB,EAAEtC,cAAc;EAChB,EAAEC,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgEvB,CAAC;AAEM,eAAesC,2BAA2B,EAC/CR,WAAW,EACXC,MAAM,EAIP;IACC,IAAIA,QACF,OAAOF,2BAA2B;QAAEC;QAAaC;IAAO;IAG1D,OAAO,GAAGK,oBAAoB;QAAEN;IAAY,GAAG,IAAI,EAAEO,gBAAgB;AACvE;AAEO,MAAME,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}

package/dist/lib/ai-model/service-caller/index.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack://@midscene/core/webpack/runtime/compat_get_default_export","webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n type IModelConfig,\n MIDSCENE_API_TYPE,\n MIDSCENE_LANGSMITH_DEBUG,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { parseBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' \| 'anthropic';\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes \| undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n openaiUseAzureDeprecated,\n useAzureOpenai,\n azureOpenaiScope,\n azureOpenaiKey,\n azureOpenaiEndpoint,\n azureOpenaiApiVersion,\n azureOpenaiDeployment,\n azureExtraConfig,\n useAnthropicSdk,\n anthropicApiKey,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n } = modelConfig;\n\n let openai: OpenAI \| AzureOpenAI \| undefined;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (openaiUseAzureDeprecated) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (useAzureOpenai) {\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n let tokenProvider: any = undefined;\n if (azureOpenaiScope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: azureOpenaiKey,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n dangerouslyAllowBrowser: true,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n }\n } else if (!useAnthropicSdk) {\n openai = new OpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n defaultHeaders: {\n ...(openaiExtraConfig?.defaultHeaders \|\| {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n // Anthropic\n if (useAnthropicSdk) {\n openai = new Anthropic({\n apiKey: anthropicApiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const {\n completion,\n style,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n } = await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const responseFormat = getResponseFormat(modelName, AIActionTypeValue);\n\n const maxTokens = globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string \| undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage \| undefined;\n let timeCost: number \| undefined;\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens \|\| '2048', 10),\n ...(vlMode === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string \| null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content \|\| '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content \|\| '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content \|\| reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode \|\| 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode \|\| 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens \|\| ''}, completion-tokens, ${result.usage?.completion_tokens \|\| ''}, total-tokens, ${result.usage?.total_tokens \|\| ''}, cost-ms, ${timeCost}, requestId, ${result._request_id \|\| ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text \|\| '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content \|\| '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content \|\| '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport const getResponseFormat = (\n modelName: string,\n AIActionTypeValue: AIActionType,\n):\n \| OpenAI.ChatCompletionCreateParams['response_format']\n \| OpenAI.ResponseFormatJSONObject => {\n let responseFormat:\n \| OpenAI.ChatCompletionCreateParams['response_format']\n \| OpenAI.ResponseFormatJSONObject\n \| undefined;\n\n if (modelName.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n case AIActionType.TEXT:\n // No response format for plain text - return as-is\n responseFormat = undefined;\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n // Skip for plain text to allow string output\n if (\n modelName === 'gpt-4o-2024-05-13' &&\n AIActionTypeValue !== AIActionType.TEXT\n ) {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n return responseFormat;\n};\n\nexport async function callAIWithObjectResponse<T>(\n messages: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s(\\{[\\s\\S]\\})\\s$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s(\\{[\\s\\S]?\\})\\s```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes \| undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\$(\\d+),(\\d+)\$/)) {\n return cleanJsonString\n .match(/\$(\\d+),(\\d+)\$/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' \|\| vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","openaiUseAzureDeprecated","useAzureOpenai","azureOpenaiScope","azureOpenaiKey","azureOpenaiEndpoint","azureOpenaiApiVersion","azureOpenaiDeployment","azureExtraConfig","useAnthropicSdk","anthropicApiKey","modelDescription","uiTarsVersion","vlMode","openai","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","AzureOpenAI","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","OpenAI","MIDSCENE_API_TYPE","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","Error","console","wrapOpenAI","Anthropic","callAI","messages","options","completion","style","responseFormat","getResponseFormat","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","commonConfig","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IC0BA,eAAeI,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;QAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,wBAAwB,EACxBC,cAAc,EACdC,gBAAgB,EAChBC,cAAc,EACdC,mBAAmB,EACnBC,qBAAqB,EACrBC,qBAAqB,EACrBC,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACP,GAAGnB;QAEJ,IAAIoB;QAEJ,IAAIC;QACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAC5B,IAAIrB,WAAW;YACboB,WAAW,oBAAoBpB;YAC/BmB,aAAa,IAAIG,2CAAAA,eAAeA,CAACtB;QACnC,OAAO,IAAID,YAAY;YACrBqB,WAAW,qBAAqBrB;YAChCoB,aAAa,IAAII,2CAAAA,eAAeA,CAACxB;QACnC;QAEA,IAAIM,0BAEFa,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;YACvB,SAAStB;YACT,QAAQC;YACR,WAAWgB;YACX,GAAGf,iBAAiB;YACpB,yBAAyB;QAC3B;aACK,IAAIE,gBAAgB;YAGzB,IAAImB;YACJ,IAAIlB,kBAAkB;gBACpBmB,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,CAACC,sBAAAA,WAAWA,EACZ;gBAEF,MAAMC,aAAa,IAAIC,yBAAAA,sBAAsBA;gBAE7CJ,gBAAgBK,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EAAuBF,YAAYrB;gBAEnDW,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;oBACvB,sBAAsBC;oBACtB,UAAUhB;oBACV,YAAYC;oBACZ,YAAYC;oBACZ,GAAGP,iBAAiB;oBACpB,GAAGQ,gBAAgB;gBACrB;YACF,OAEEM,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;gBACvB,QAAQhB;gBACR,UAAUC;gBACV,YAAYC;gBACZ,YAAYC;gBACZ,yBAAyB;gBACzB,GAAGP,iBAAiB;gBACpB,GAAGQ,gBAAgB;YACrB;QAEJ,OAAO,IAAI,CAACC,iBACVK,SAAS,IAAIa,CAAAA,yBAAAA,EAAO;YAClB,SAAS7B;YACT,QAAQC;YACR,WAAWgB;YACX,GAAGf,iBAAiB;YACpB,gBAAgB;gBACd,GAAIA,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,cAAc,AAAD,KAAK,CAAC,CAAC;gBAC3C,CAAC4B,oBAAAA,iBAAiBA,CAAC,EAAEnC,kBAAkB,QAAQ;YACjD;YACA,yBAAyB;QAC3B;QAGF,IACEqB,UACAe,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;YACA,IAAIP,sBAAAA,WAAWA,EACb,MAAM,IAAIQ,MAAM;YAElBC,QAAQ,GAAG,CAAC;YACZ,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM;YAC7BnB,SAASmB,WAAWnB;QACtB;QAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;YACL,YAAYA,OAAO,IAAI,CAAC,WAAW;YACnC,OAAO;YACPjB;YACAc;YACAC;YACAC;QACF;QAIF,IAAIJ,iBACFK,SAAS,IAAIoB,oBAAAA,SAASA,CAAC;YACrB,QAAQxB;YACR,WAAWK;YACX,yBAAyB;QAC3B;QAGF,IAAI,AAAkB,WAAXD,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;YACL,YAAaA,OAAe,QAAQ;YACpC,OAAO;YACPjB;YACAc;YACAC;YACAC;QACF;QAGF,MAAM,IAAIkB,MAAM;IAClB;IAEO,eAAeI,OACpBC,QAAsC,EACtC3C,iBAA+B,EAC/BC,WAAyB,EACzB2C,OAGC;QAED,MAAM,EACJC,UAAU,EACVC,KAAK,EACL1C,SAAS,EACTc,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACP,GAAG,MAAMrB,iBAAiB;YACzBC;YACAC;QACF;QAEA,MAAM8C,iBAAiBC,kBAAkB5C,WAAWJ;QAEpD,MAAMiD,YAAYb,oBAAAA,mBAAAA,CAAAA,iBAAqC,CAACc,oBAAAA,iBAAiBA;QACzE,MAAMC,YAAY3B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAC3B,MAAM4B,oBAAoB5B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QACnC,MAAM6B,qBAAqB7B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAEpC,MAAM8B,YAAYC,KAAK,GAAG;QAE1B,MAAMC,cAAcZ,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QACtD,IAAIa;QACJ,IAAIC,cAAc;QAClB,IAAIC;QACJ,IAAIC;QAEJ,MAAMC,eAAe;YACnB,aAAazC,AAAW,kBAAXA,SAA2B,MAAM;YAC9C,QAAQ,CAAC,CAACoC;YACV,YACE,AAAqB,YAArB,OAAOP,YACHA,YACAa,OAAO,QAAQ,CAACb,aAAa,QAAQ;YAC3C,GAAI7B,AAAW,cAAXA,SACA;gBACE,2BAA2B;YAC7B,IACA,CAAC,CAAC;QACR;QAEA,IAAI;YACF,IAAI0B,AAAU,aAAVA,OAAoB;gBACtBK,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAEpD,WAAW;gBAGrE,IAAIoD,aAAa;oBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CACrC;wBACE,OAAOzC;wBACPuC;wBACA,iBAAiBI;wBACjB,GAAGc,YAAY;oBACjB,GACA;wBACE,QAAQ;oBACV;oBAKF,WAAW,MAAMG,SAASD,OAAQ;4BAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;wBAtBJ,MAAMV,UAAUQ,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;wBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;wBAG3D,IAAIF,MAAM,KAAK,EACbL,QAAQK,MAAM,KAAK;wBAGrB,IAAIP,WAAWW,mBAAmB;4BAChCV,eAAeD;4BACf,MAAMY,YAAiC;gCACrCZ;gCACAW;gCACAV;gCACA,YAAY;gCACZ,OAAOY;4BACT;4BACA1B,QAAQ,OAAO,CAAEyB;wBACnB;wBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;4BACrCP,WAAWL,KAAK,GAAG,KAAKD;4BAGxB,IAAI,CAACK,OAAO;gCAEV,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACd,YAAY,MAAM,GAAG;gCAElCC,QAAQ;oCACN,eAAeY;oCACf,mBAAmBA;oCACnB,cAAcA,AAAkB,IAAlBA;gCAChB;4BACF;4BAGA,MAAME,aAAkC;gCACtC,SAAS;gCACTf;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAO;oCACL,eAAeC,MAAM,aAAa,IAAI;oCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;oCAC9C,cAAcA,MAAM,YAAY,IAAI;oCACpC,WAAWC,YAAY;oCACvB,YAAYxD;oCACZ,mBAAmBc;oCACnB,QAAQjB,YAAY,MAAM;gCAC5B;4BACF;4BACA2C,QAAQ,OAAO,CAAE6B;4BACjB;wBACF;oBACF;oBACAhB,UAAUC;oBACVN,kBACE,CAAC,iBAAiB,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,WAAW,EAAEwC,UAAU;gBAEvF,OAAO;wBAUqGc,eAAyDC,gBAAwDC;oBAT3N,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;wBACrC,OAAOzC;wBACPuC;wBACA,iBAAiBI;wBACjB,GAAGc,YAAY;oBACjB;oBACAD,WAAWL,KAAK,GAAG,KAAKD;oBAExBF,kBACE,CAAC,OAAO,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEuD,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEhB,SAAS,aAAa,EAAEiB,OAAO,WAAW,IAAI,IAAI;oBAG3TxB,mBACE,CAAC,oBAAoB,EAAEyB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;oBAGvDhD,IAAAA,sBAAAA,MAAAA,AAAAA,EACEgD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;oBAEhEpB,UAAUoB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;oBAC3ClB,QAAQkB,OAAO,KAAK;gBACtB;gBAEA1B,UAAU,CAAC,UAAU,EAAEM,SAAS;gBAChC5B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO4B,SAAS;YAClB,OAAO,IAAIX,AAAU,gBAAVA,OAAuB;gBAChC,MAAMiC,sBAAsB,CAACtB;oBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;wBAChC,MAAMuB,YAAYvB,QAAQ,SAAS,CAAC,GAAG;wBACvC5B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOmD,WAAW;wBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,AAAAA,IAAAA,oBAAAA,WAAAA,AAAAA,EAAY1B,QAAQ,SAAS,CAAC,GAAG;wBAC5D,OAAO;4BACL,QAAQ;gCACN,MAAM;gCACN,YAAYwB;gCACZ,MAAMC;4BACR;4BACA,MAAM;wBACR;oBACF;oBACA,OAAOzB;gBACT;gBAEA,IAAID,aAAa;oBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CAAC;wBACtC,OAAOzC;wBACP,QAAQ;wBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;gCAC7B,MAAM;gCACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;4BACf;wBACA,iBAAiBrC;wBACjB,GAAGc,YAAY;oBACjB;oBAEA,WAAW,MAAMG,SAASD,OAAQ;4BAChBuB;wBAAhB,MAAM7B,UAAU6B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;wBACrC,IAAI7B,SAAS;4BACXC,eAAeD;4BACf,MAAMY,YAAiC;gCACrCZ;gCACAC;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAOY;4BACT;4BACA1B,QAAQ,OAAO,CAAEyB;wBACnB;wBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;4BACjCJ,WAAWL,KAAK,GAAG,KAAKD;4BACxB,MAAMiC,iBAAiBvB,MAAM,KAAK;4BAGlC,MAAMS,aAAkC;gCACtC,SAAS;gCACTf;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAO6B,iBACH;oCACE,eAAeA,eAAe,YAAY,IAAI;oCAC9C,mBAAmBA,eAAe,aAAa,IAAI;oCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;oCACnC,WAAW3B,YAAY;oCACvB,YAAYxD;oCACZ,mBAAmBc;oCACnB,QAAQjB,YAAY,MAAM;gCAC5B,IACAqE;4BACN;4BACA1B,QAAQ,OAAO,CAAE6B;4BACjB;wBACF;oBACF;oBACAhB,UAAUC;gBACZ,OAAO;oBACL,MAAMmB,SAAS,MAAMhC,WAAW,MAAM,CAAC;wBACrC,OAAOzC;wBACP,QAAQ;wBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;gCAC7B,MAAM;gCACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;4BACf;wBACA,iBAAiBrC;wBACjB,GAAGc,YAAY;oBACjB;oBACAD,WAAWL,KAAK,GAAG,KAAKD;oBACxBG,UAAWoB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;oBACzClB,QAAQkB,OAAO,KAAK;gBACtB;gBAEAhD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO4B,SAAS;YAClB;YAEA,IAAID,eAAe,CAACG,OAAO;gBAEzB,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEf,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;gBAEtCE,QAAQ;oBACN,eAAeY;oBACf,mBAAmBA;oBACnB,cAAcA,AAAkB,IAAlBA;gBAChB;YACF;YAEA,OAAO;gBACL,SAASd,WAAW;gBACpB,OAAOE,QACH;oBACE,eAAeA,MAAM,aAAa,IAAI;oBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;oBAC9C,cAAcA,MAAM,YAAY,IAAI;oBACpC,WAAWC,YAAY;oBACvB,YAAYxD;oBACZ,mBAAmBc;oBACnB,QAAQjB,YAAY,MAAM;gBAC5B,IACAqE;gBACJ,YAAY,CAAC,CAACd;YAChB;QACF,EAAE,OAAOgC,GAAQ;YACfjD,QAAQ,KAAK,CAAC,kBAAkBiD;YAChC,MAAMC,WAAW,IAAInD,MACnB,CAAC,eAAe,EAAEkB,cAAc,eAAe,GAAG,kBAAkB,EAAEgC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;gBACE,OAAOA;YACT;YAEF,MAAMC;QACR;IACF;IAEO,MAAMzC,oBAAoB,CAC/B5C,WACAJ;QAIA,IAAI+C;QAKJ,IAAI3C,UAAU,QAAQ,CAAC,UACrB,OAAQJ;YACN,KAAK0F,mCAAAA,YAAAA,CAAAA,MAAmB;gBACtB3C,iBAAiB4C,6BAAAA,YAAYA;gBAC7B;YACF,KAAKD,mCAAAA,YAAAA,CAAAA,eAA4B;gBAC/B3C,iBAAiB6C,+BAAAA,aAAaA;gBAC9B;YACF,KAAKF,mCAAAA,YAAAA,CAAAA,IAAiB;gBACpB3C,iBAAiB8C,gCAAAA,UAAUA;gBAC3B;YACF,KAAKH,mCAAAA,YAAAA,CAAAA,YAAyB;YAC9B,KAAKA,mCAAAA,YAAAA,CAAAA,gBAA6B;gBAChC3C,iBAAiB;oBAAE,MAAM+C,kCAAAA,gBAAAA,CAAAA,IAAqB;gBAAC;gBAC/C;YACF,KAAKJ,mCAAAA,YAAAA,CAAAA,IAAiB;gBAEpB3C,iBAAiBuB;gBACjB;QACJ;QAKF,IACElE,AAAc,wBAAdA,aACAJ,sBAAsB0F,mCAAAA,YAAAA,CAAAA,IAAiB,EAEvC3C,iBAAiB;YAAE,MAAM+C,kCAAAA,gBAAAA,CAAAA,IAAqB;QAAC;QAGjD,OAAO/C;IACT;IAEO,eAAegD,yBACpBpD,QAAgB,EAChB3C,iBAA+B,EAC/BC,WAAyB;QAEzB,MAAM+F,WAAW,MAAMtD,OAAOC,UAAU3C,mBAAmBC;QAC3D4B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOmE,UAAU;QACjB,MAAM5E,SAASnB,YAAY,MAAM;QACjC,MAAMgG,cAAcC,cAAcF,SAAS,OAAO,EAAE5E;QACpD,OAAO;YAAE,SAAS6E;YAAa,OAAOD,SAAS,KAAK;QAAC;IACvD;IAEO,eAAeG,yBACpBC,IAAY,EACZpG,iBAA+B,EAC/BC,WAAyB;QAEzB,MAAM,EAAEwD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,OAAO0D,MAAMpG,mBAAmBC;QACjE,OAAO;YAAEwD;YAASE;QAAM;IAC1B;IAEO,SAAS0C,yBAAyBL,QAAgB;QACvD,IAAI;YAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;YACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;YAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;YAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;YAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;YACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;QAE3B,EAAE,OAAM,CAAC;QAET,OAAOR;IACT;IAEO,SAASS,yBAAyBC,KAAa;QACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;QAG5C,OAAOA;IACT;IAEO,SAASR,cAAcQ,KAAa,EAAEtF,MAAgC;QAC3E,MAAMuF,kBAAkBN,yBAAyBK;QAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;gBACtCC;YAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC9C;QACT;QACA,IAAI;YACF,OAAOgB,KAAK,KAAK,CAAC6B;QACpB,EAAE,OAAM,CAAC;QACT,IAAI;YACF,OAAO7B,KAAK,KAAK,CAAC+B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWF;QAC/B,EAAE,OAAOnB,GAAG,CAAC;QAEb,IAAIpE,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;YAC1D,MAAM0F,aAAaL,yBAAyBE;YAC5C,OAAO7B,KAAK,KAAK,CAAC+B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;QAC/B;QACA,MAAMxE,MAAM,CAAC,+BAA+B,EAAEoE,OAAO;IACvD"}
1	+ {"version":3,"file":"ai-model/service-caller/index.js","sources":["webpack://@midscene/core/webpack/runtime/compat_get_default_export","webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/service-caller/index.ts"],"sourcesContent":["// getDefaultExport function for compatibility with non-ESM modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};\n","__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport { Anthropic } from '@anthropic-ai/sdk';\nimport {\n DefaultAzureCredential,\n getBearerTokenProvider,\n} from '@azure/identity';\nimport {\n type IModelConfig,\n MIDSCENE_API_TYPE,\n MIDSCENE_LANGSMITH_DEBUG,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { parseBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI, { AzureOpenAI } from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { locatorSchema } from '../prompt/llm-locator';\nimport { planSchema } from '../prompt/llm-planning';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n style: 'openai' \| 'anthropic';\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes \| undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n openaiUseAzureDeprecated,\n useAzureOpenai,\n azureOpenaiScope,\n azureOpenaiKey,\n azureOpenaiEndpoint,\n azureOpenaiApiVersion,\n azureOpenaiDeployment,\n azureExtraConfig,\n useAnthropicSdk,\n anthropicApiKey,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n } = modelConfig;\n\n let openai: OpenAI \| AzureOpenAI \| undefined;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n if (openaiUseAzureDeprecated) {\n // this is deprecated\n openai = new AzureOpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n dangerouslyAllowBrowser: true,\n }) as OpenAI;\n } else if (useAzureOpenai) {\n // https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=bash%2Cjavascript-key%2Ctypescript-keyless%2Cpython&pivots=programming-language-javascript#rest-api\n // keyless authentication\n let tokenProvider: any = undefined;\n if (azureOpenaiScope) {\n assert(\n !ifInBrowser,\n 'Azure OpenAI is not supported in browser with Midscene.',\n );\n const credential = new DefaultAzureCredential();\n\n tokenProvider = getBearerTokenProvider(credential, azureOpenaiScope);\n\n openai = new AzureOpenAI({\n azureADTokenProvider: tokenProvider,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n } else {\n // endpoint, apiKey, apiVersion, deployment\n openai = new AzureOpenAI({\n apiKey: azureOpenaiKey,\n endpoint: azureOpenaiEndpoint,\n apiVersion: azureOpenaiApiVersion,\n deployment: azureOpenaiDeployment,\n dangerouslyAllowBrowser: true,\n ...openaiExtraConfig,\n ...azureExtraConfig,\n });\n }\n } else if (!useAnthropicSdk) {\n openai = new OpenAI({\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n httpAgent: proxyAgent,\n ...openaiExtraConfig,\n defaultHeaders: {\n ...(openaiExtraConfig?.defaultHeaders \|\| {}),\n [MIDSCENE_API_TYPE]: AIActionTypeValue.toString(),\n },\n dangerouslyAllowBrowser: true,\n });\n }\n\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n const { wrapOpenAI } = await import('langsmith/wrappers');\n openai = wrapOpenAI(openai);\n }\n\n if (typeof openai !== 'undefined') {\n return {\n completion: openai.chat.completions,\n style: 'openai',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n // Anthropic\n if (useAnthropicSdk) {\n openai = new Anthropic({\n apiKey: anthropicApiKey,\n httpAgent: proxyAgent,\n dangerouslyAllowBrowser: true,\n }) as any;\n }\n\n if (typeof openai !== 'undefined' && (openai as any).messages) {\n return {\n completion: (openai as any).messages,\n style: 'anthropic',\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n }\n\n throw new Error('Openai SDK or Anthropic SDK is not initialized');\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const {\n completion,\n style,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n } = await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const responseFormat = getResponseFormat(modelName, AIActionTypeValue);\n\n const maxTokens = globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string \| undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage \| undefined;\n let timeCost: number \| undefined;\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : 0.1,\n stream: !!isStreaming,\n max_tokens:\n typeof maxTokens === 'number'\n ? maxTokens\n : Number.parseInt(maxTokens \|\| '2048', 10),\n ...(vlMode === 'qwen-vl' // qwen specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n if (style === 'openai') {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string \| null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content \|\| '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content \|\| '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content \|\| reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n },\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode \|\| 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode \|\| 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens \|\| ''}, completion-tokens, ${result.usage?.completion_tokens \|\| ''}, total-tokens, ${result.usage?.total_tokens \|\| ''}, cost-ms, ${timeCost}, requestId, ${result._request_id \|\| ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n } else if (style === 'anthropic') {\n const convertImageContent = (content: any) => {\n if (content.type === 'image_url') {\n const imgBase64 = content.image_url.url;\n assert(imgBase64, 'image_url is required');\n const { mimeType, body } = parseBase64(content.image_url.url);\n return {\n source: {\n type: 'base64',\n media_type: mimeType,\n data: body,\n },\n type: 'image',\n };\n }\n return content;\n };\n\n if (isStreaming) {\n const stream = (await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any)) as any;\n\n for await (const chunk of stream) {\n const content = chunk.delta?.text \|\| '';\n if (content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n accumulated,\n reasoning_content: '',\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.type === 'message_stop') {\n timeCost = Date.now() - startTime;\n const anthropicUsage = chunk.usage;\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: anthropicUsage\n ? {\n prompt_tokens: anthropicUsage.input_tokens ?? 0,\n completion_tokens: anthropicUsage.output_tokens ?? 0,\n total_tokens:\n (anthropicUsage.input_tokens ?? 0) +\n (anthropicUsage.output_tokens ?? 0),\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n } else {\n const result = await completion.create({\n model: modelName,\n system: 'You are a versatile professional in software UI automation',\n messages: messages.map((m) => ({\n role: 'user',\n content: Array.isArray(m.content)\n ? (m.content as any).map(convertImageContent)\n : m.content,\n })),\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n content = (result as any).content[0].text as string;\n usage = result.usage;\n }\n\n assert(content, 'empty content');\n }\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content \|\| '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n return {\n content: content \|\| '',\n usage: usage\n ? {\n prompt_tokens: usage.prompt_tokens ?? 0,\n completion_tokens: usage.completion_tokens ?? 0,\n total_tokens: usage.total_tokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n }\n : undefined,\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport const getResponseFormat = (\n modelName: string,\n AIActionTypeValue: AIActionType,\n):\n \| OpenAI.ChatCompletionCreateParams['response_format']\n \| OpenAI.ResponseFormatJSONObject => {\n let responseFormat:\n \| OpenAI.ChatCompletionCreateParams['response_format']\n \| OpenAI.ResponseFormatJSONObject\n \| undefined;\n\n if (modelName.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.INSPECT_ELEMENT:\n responseFormat = locatorSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n case AIActionType.TEXT:\n // No response format for plain text - return as-is\n responseFormat = undefined;\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n // Skip for plain text to allow string output\n if (\n modelName === 'gpt-4o-2024-05-13' &&\n AIActionTypeValue !== AIActionType.TEXT\n ) {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n return responseFormat;\n};\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return { content: jsonContent, usage: response.usage };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s(\\{[\\s\\S]\\})\\s$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s(\\{[\\s\\S]?\\})\\s```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes \| undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\$(\\d+),(\\d+)\$/)) {\n return cleanJsonString\n .match(/\$(\\d+),(\\d+)\$/)\n ?.slice(1)\n .map(Number);\n }\n try {\n return JSON.parse(cleanJsonString);\n } catch {}\n try {\n return JSON.parse(jsonrepair(cleanJsonString));\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' \|\| vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n return JSON.parse(jsonrepair(jsonString));\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["__webpack_require__","module","getter","definition","key","Object","obj","prop","Symbol","createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","openaiUseAzureDeprecated","useAzureOpenai","azureOpenaiScope","azureOpenaiKey","azureOpenaiEndpoint","azureOpenaiApiVersion","azureOpenaiDeployment","azureExtraConfig","useAnthropicSdk","anthropicApiKey","modelDescription","uiTarsVersion","vlMode","openai","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","AzureOpenAI","tokenProvider","assert","ifInBrowser","credential","DefaultAzureCredential","getBearerTokenProvider","OpenAI","MIDSCENE_API_TYPE","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","Error","console","wrapOpenAI","Anthropic","callAI","messages","options","completion","style","responseFormat","getResponseFormat","maxTokens","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","commonConfig","Number","stream","chunk","_chunk_choices__delta","_chunk_choices__delta1","_chunk_choices_2","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","_result_usage","_result_usage1","_result_usage2","result","JSON","convertImageContent","imgBase64","mimeType","body","parseBase64","m","Array","_chunk_delta","anthropicUsage","e","newError","AIActionType","assertSchema","locatorSchema","planSchema","AIResponseFormat","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","cleanJsonString","_cleanJsonString_match","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;;;;;;;;;IACAA,oBAAoB,CAAC,GAAG,CAACC;QACxB,IAAIC,SAASD,UAAUA,OAAO,UAAU,GACvC,IAAOA,MAAM,CAAC,UAAU,GACxB,IAAOA;QACRD,oBAAoB,CAAC,CAACE,QAAQ;YAAE,GAAGA;QAAO;QAC1C,OAAOA;IACR;;;ICPAF,oBAAoB,CAAC,GAAG,CAAC,UAASG;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGH,oBAAoB,CAAC,CAACG,YAAYC,QAAQ,CAACJ,oBAAoB,CAAC,CAAC,UAASI,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAJ,oBAAoB,CAAC,GAAG,CAACM,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFP,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOQ,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IC0BA,eAAeI,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;QAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,wBAAwB,EACxBC,cAAc,EACdC,gBAAgB,EAChBC,cAAc,EACdC,mBAAmB,EACnBC,qBAAqB,EACrBC,qBAAqB,EACrBC,gBAAgB,EAChBC,eAAe,EACfC,eAAe,EACfC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACP,GAAGnB;QAEJ,IAAIoB;QAEJ,IAAIC;QACJ,MAAMC,aAAaC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAC5B,IAAIrB,WAAW;YACboB,WAAW,oBAAoBpB;YAC/BmB,aAAa,IAAIG,2CAAAA,eAAeA,CAACtB;QACnC,OAAO,IAAID,YAAY;YACrBqB,WAAW,qBAAqBrB;YAChCoB,aAAa,IAAII,2CAAAA,eAAeA,CAACxB;QACnC;QAEA,IAAIM,0BAEFa,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;YACvB,SAAStB;YACT,QAAQC;YACR,WAAWgB;YACX,GAAGf,iBAAiB;YACpB,yBAAyB;QAC3B;aACK,IAAIE,gBAAgB;YAGzB,IAAImB;YACJ,IAAIlB,kBAAkB;gBACpBmB,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,CAACC,sBAAAA,WAAWA,EACZ;gBAEF,MAAMC,aAAa,IAAIC,yBAAAA,sBAAsBA;gBAE7CJ,gBAAgBK,AAAAA,IAAAA,yBAAAA,sBAAAA,AAAAA,EAAuBF,YAAYrB;gBAEnDW,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;oBACvB,sBAAsBC;oBACtB,UAAUhB;oBACV,YAAYC;oBACZ,YAAYC;oBACZ,GAAGP,iBAAiB;oBACpB,GAAGQ,gBAAgB;gBACrB;YACF,OAEEM,SAAS,IAAIM,gCAAAA,WAAWA,CAAC;gBACvB,QAAQhB;gBACR,UAAUC;gBACV,YAAYC;gBACZ,YAAYC;gBACZ,yBAAyB;gBACzB,GAAGP,iBAAiB;gBACpB,GAAGQ,gBAAgB;YACrB;QAEJ,OAAO,IAAI,CAACC,iBACVK,SAAS,IAAIa,CAAAA,yBAAAA,EAAO;YAClB,SAAS7B;YACT,QAAQC;YACR,WAAWgB;YACX,GAAGf,iBAAiB;YACpB,gBAAgB;gBACd,GAAIA,AAAAA,CAAAA,QAAAA,oBAAAA,KAAAA,IAAAA,kBAAmB,cAAc,AAAD,KAAK,CAAC,CAAC;gBAC3C,CAAC4B,oBAAAA,iBAAiBA,CAAC,EAAEnC,kBAAkB,QAAQ;YACjD;YACA,yBAAyB;QAC3B;QAGF,IACEqB,UACAe,oBAAAA,mBAAAA,CAAAA,qBAAyC,CAACC,oBAAAA,wBAAwBA,GAClE;YACA,IAAIP,sBAAAA,WAAWA,EACb,MAAM,IAAIQ,MAAM;YAElBC,QAAQ,GAAG,CAAC;YACZ,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM;YAC7BnB,SAASmB,WAAWnB;QACtB;QAEA,IAAI,AAAkB,WAAXA,QACT,OAAO;YACL,YAAYA,OAAO,IAAI,CAAC,WAAW;YACnC,OAAO;YACPjB;YACAc;YACAC;YACAC;QACF;QAIF,IAAIJ,iBACFK,SAAS,IAAIoB,oBAAAA,SAASA,CAAC;YACrB,QAAQxB;YACR,WAAWK;YACX,yBAAyB;QAC3B;QAGF,IAAI,AAAkB,WAAXD,UAA2BA,OAAe,QAAQ,EAC3D,OAAO;YACL,YAAaA,OAAe,QAAQ;YACpC,OAAO;YACPjB;YACAc;YACAC;YACAC;QACF;QAGF,MAAM,IAAIkB,MAAM;IAClB;IAEO,eAAeI,OACpBC,QAAsC,EACtC3C,iBAA+B,EAC/BC,WAAyB,EACzB2C,OAGC;QAED,MAAM,EACJC,UAAU,EACVC,KAAK,EACL1C,SAAS,EACTc,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACP,GAAG,MAAMrB,iBAAiB;YACzBC;YACAC;QACF;QAEA,MAAM8C,iBAAiBC,kBAAkB5C,WAAWJ;QAEpD,MAAMiD,YAAYb,oBAAAA,mBAAAA,CAAAA,iBAAqC,CAACc,oBAAAA,iBAAiBA;QACzE,MAAMC,YAAY3B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAC3B,MAAM4B,oBAAoB5B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QACnC,MAAM6B,qBAAqB7B,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;QAEpC,MAAM8B,YAAYC,KAAK,GAAG;QAE1B,MAAMC,cAAcZ,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,MAAM,AAAD,KAAKA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QACtD,IAAIa;QACJ,IAAIC,cAAc;QAClB,IAAIC;QACJ,IAAIC;QAEJ,MAAMC,eAAe;YACnB,aAAazC,AAAW,kBAAXA,SAA2B,MAAM;YAC9C,QAAQ,CAAC,CAACoC;YACV,YACE,AAAqB,YAArB,OAAOP,YACHA,YACAa,OAAO,QAAQ,CAACb,aAAa,QAAQ;YAC3C,GAAI7B,AAAW,cAAXA,SACA;gBACE,2BAA2B;YAC7B,IACA,CAAC,CAAC;QACR;QAEA,IAAI;YACF,IAAI0B,AAAU,aAAVA,OAAoB;gBACtBK,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAEpD,WAAW;gBAGrE,IAAIoD,aAAa;oBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CACrC;wBACE,OAAOzC;wBACPuC;wBACA,iBAAiBI;wBACjB,GAAGc,YAAY;oBACjB,GACA;wBACE,QAAQ;oBACV;oBAKF,WAAW,MAAMG,SAASD,OAAQ;4BAChBE,uBAAAA,iBAAAA,gBAEbC,wBAAAA,kBAAAA,iBAoBCC,kBAAAA;wBAtBJ,MAAMV,UAAUQ,AAAAA,SAAAA,CAAAA,iBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,cAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,wBAAAA,gBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,sBAA2B,OAAO,AAAD,KAAK;wBACtD,MAAMG,oBACJ,AAAC,SAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,QAAAA,CAAAA,yBAAAA,iBAAoB,KAAK,AAAD,IAAxBA,KAAAA,IAAAA,uBAAmC,iBAAiB,AAAD,KAAK;wBAG3D,IAAIF,MAAM,KAAK,EACbL,QAAQK,MAAM,KAAK;wBAGrB,IAAIP,WAAWW,mBAAmB;4BAChCV,eAAeD;4BACf,MAAMY,YAAiC;gCACrCZ;gCACAW;gCACAV;gCACA,YAAY;gCACZ,OAAOY;4BACT;4BACA1B,QAAQ,OAAO,CAAEyB;wBACnB;wBAGA,IAAI,QAAAF,CAAAA,kBAAAA,MAAM,OAAO,AAAD,IAAZA,KAAAA,IAAAA,QAAAA,CAAAA,mBAAAA,eAAe,CAAC,EAAE,AAAD,IAAjBA,KAAAA,IAAAA,iBAAoB,aAAa,EAAE;4BACrCP,WAAWL,KAAK,GAAG,KAAKD;4BAGxB,IAAI,CAACK,OAAO;gCAEV,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACd,YAAY,MAAM,GAAG;gCAElCC,QAAQ;oCACN,eAAeY;oCACf,mBAAmBA;oCACnB,cAAcA,AAAkB,IAAlBA;gCAChB;4BACF;4BAGA,MAAME,aAAkC;gCACtC,SAAS;gCACTf;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAO;oCACL,eAAeC,MAAM,aAAa,IAAI;oCACtC,mBAAmBA,MAAM,iBAAiB,IAAI;oCAC9C,cAAcA,MAAM,YAAY,IAAI;oCACpC,WAAWC,YAAY;oCACvB,YAAYxD;oCACZ,mBAAmBc;oCACnB,QAAQjB,YAAY,MAAM;gCAC5B;4BACF;4BACA2C,QAAQ,OAAO,CAAE6B;4BACjB;wBACF;oBACF;oBACAhB,UAAUC;oBACVN,kBACE,CAAC,iBAAiB,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,WAAW,EAAEwC,UAAU;gBAEvF,OAAO;wBAUqGc,eAAyDC,gBAAwDC;oBAT3N,MAAMC,SAAS,MAAMhC,WAAW,MAAM,CAAC;wBACrC,OAAOzC;wBACPuC;wBACA,iBAAiBI;wBACjB,GAAGc,YAAY;oBACjB;oBACAD,WAAWL,KAAK,GAAG,KAAKD;oBAExBF,kBACE,CAAC,OAAO,EAAEhD,UAAU,QAAQ,EAAEgB,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEuD,AAAAA,SAAAA,CAAAA,gBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,cAAc,aAAa,AAAD,KAAK,GAAG,qBAAqB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,iBAAiB,AAAD,KAAK,GAAG,gBAAgB,EAAEC,AAAAA,SAAAA,CAAAA,iBAAAA,OAAO,KAAK,AAAD,IAAXA,KAAAA,IAAAA,eAAc,YAAY,AAAD,KAAK,GAAG,WAAW,EAAEhB,SAAS,aAAa,EAAEiB,OAAO,WAAW,IAAI,IAAI;oBAG3TxB,mBACE,CAAC,oBAAoB,EAAEyB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;oBAGvDhD,IAAAA,sBAAAA,MAAAA,AAAAA,EACEgD,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;oBAEhEpB,UAAUoB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;oBAC3ClB,QAAQkB,OAAO,KAAK;gBACtB;gBAEA1B,UAAU,CAAC,UAAU,EAAEM,SAAS;gBAChC5B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO4B,SAAS;YAClB,OAAO,IAAIX,AAAU,gBAAVA,OAAuB;gBAChC,MAAMiC,sBAAsB,CAACtB;oBAC3B,IAAIA,AAAiB,gBAAjBA,QAAQ,IAAI,EAAkB;wBAChC,MAAMuB,YAAYvB,QAAQ,SAAS,CAAC,GAAG;wBACvC5B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOmD,WAAW;wBAClB,MAAM,EAAEC,QAAQ,EAAEC,IAAI,EAAE,GAAGC,AAAAA,IAAAA,oBAAAA,WAAAA,AAAAA,EAAY1B,QAAQ,SAAS,CAAC,GAAG;wBAC5D,OAAO;4BACL,QAAQ;gCACN,MAAM;gCACN,YAAYwB;gCACZ,MAAMC;4BACR;4BACA,MAAM;wBACR;oBACF;oBACA,OAAOzB;gBACT;gBAEA,IAAID,aAAa;oBACf,MAAMO,SAAU,MAAMlB,WAAW,MAAM,CAAC;wBACtC,OAAOzC;wBACP,QAAQ;wBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;gCAC7B,MAAM;gCACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;4BACf;wBACA,iBAAiBrC;wBACjB,GAAGc,YAAY;oBACjB;oBAEA,WAAW,MAAMG,SAASD,OAAQ;4BAChBuB;wBAAhB,MAAM7B,UAAU6B,AAAAA,SAAAA,CAAAA,eAAAA,MAAM,KAAK,AAAD,IAAVA,KAAAA,IAAAA,aAAa,IAAI,AAAD,KAAK;wBACrC,IAAI7B,SAAS;4BACXC,eAAeD;4BACf,MAAMY,YAAiC;gCACrCZ;gCACAC;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAOY;4BACT;4BACA1B,QAAQ,OAAO,CAAEyB;wBACnB;wBAGA,IAAIL,AAAe,mBAAfA,MAAM,IAAI,EAAqB;4BACjCJ,WAAWL,KAAK,GAAG,KAAKD;4BACxB,MAAMiC,iBAAiBvB,MAAM,KAAK;4BAGlC,MAAMS,aAAkC;gCACtC,SAAS;gCACTf;gCACA,mBAAmB;gCACnB,YAAY;gCACZ,OAAO6B,iBACH;oCACE,eAAeA,eAAe,YAAY,IAAI;oCAC9C,mBAAmBA,eAAe,aAAa,IAAI;oCACnD,cACGA,AAAAA,CAAAA,eAAe,YAAY,IAAI,KAC/BA,CAAAA,eAAe,aAAa,IAAI;oCACnC,WAAW3B,YAAY;oCACvB,YAAYxD;oCACZ,mBAAmBc;oCACnB,QAAQjB,YAAY,MAAM;gCAC5B,IACAqE;4BACN;4BACA1B,QAAQ,OAAO,CAAE6B;4BACjB;wBACF;oBACF;oBACAhB,UAAUC;gBACZ,OAAO;oBACL,MAAMmB,SAAS,MAAMhC,WAAW,MAAM,CAAC;wBACrC,OAAOzC;wBACP,QAAQ;wBACR,UAAUuC,SAAS,GAAG,CAAC,CAACyC,IAAO;gCAC7B,MAAM;gCACN,SAASC,MAAM,OAAO,CAACD,EAAE,OAAO,IAC3BA,EAAE,OAAO,CAAS,GAAG,CAACL,uBACvBK,EAAE,OAAO;4BACf;wBACA,iBAAiBrC;wBACjB,GAAGc,YAAY;oBACjB;oBACAD,WAAWL,KAAK,GAAG,KAAKD;oBACxBG,UAAWoB,OAAe,OAAO,CAAC,EAAE,CAAC,IAAI;oBACzClB,QAAQkB,OAAO,KAAK;gBACtB;gBAEAhD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO4B,SAAS;YAClB;YAEA,IAAID,eAAe,CAACG,OAAO;gBAEzB,MAAMY,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEf,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;gBAEtCE,QAAQ;oBACN,eAAeY;oBACf,mBAAmBA;oBACnB,cAAcA,AAAkB,IAAlBA;gBAChB;YACF;YAEA,OAAO;gBACL,SAASd,WAAW;gBACpB,OAAOE,QACH;oBACE,eAAeA,MAAM,aAAa,IAAI;oBACtC,mBAAmBA,MAAM,iBAAiB,IAAI;oBAC9C,cAAcA,MAAM,YAAY,IAAI;oBACpC,WAAWC,YAAY;oBACvB,YAAYxD;oBACZ,mBAAmBc;oBACnB,QAAQjB,YAAY,MAAM;gBAC5B,IACAqE;gBACJ,YAAY,CAAC,CAACd;YAChB;QACF,EAAE,OAAOgC,GAAQ;YACfjD,QAAQ,KAAK,CAAC,kBAAkBiD;YAChC,MAAMC,WAAW,IAAInD,MACnB,CAAC,eAAe,EAAEkB,cAAc,eAAe,GAAG,kBAAkB,EAAEgC,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;gBACE,OAAOA;YACT;YAEF,MAAMC;QACR;IACF;IAEO,MAAMzC,oBAAoB,CAC/B5C,WACAJ;QAIA,IAAI+C;QAKJ,IAAI3C,UAAU,QAAQ,CAAC,UACrB,OAAQJ;YACN,KAAK0F,mCAAAA,YAAAA,CAAAA,MAAmB;gBACtB3C,iBAAiB4C,6BAAAA,YAAYA;gBAC7B;YACF,KAAKD,mCAAAA,YAAAA,CAAAA,eAA4B;gBAC/B3C,iBAAiB6C,+BAAAA,aAAaA;gBAC9B;YACF,KAAKF,mCAAAA,YAAAA,CAAAA,IAAiB;gBACpB3C,iBAAiB8C,gCAAAA,UAAUA;gBAC3B;YACF,KAAKH,mCAAAA,YAAAA,CAAAA,YAAyB;YAC9B,KAAKA,mCAAAA,YAAAA,CAAAA,gBAA6B;gBAChC3C,iBAAiB;oBAAE,MAAM+C,kCAAAA,gBAAAA,CAAAA,IAAqB;gBAAC;gBAC/C;YACF,KAAKJ,mCAAAA,YAAAA,CAAAA,IAAiB;gBAEpB3C,iBAAiBuB;gBACjB;QACJ;QAKF,IACElE,AAAc,wBAAdA,aACAJ,sBAAsB0F,mCAAAA,YAAAA,CAAAA,IAAiB,EAEvC3C,iBAAiB;YAAE,MAAM+C,kCAAAA,gBAAAA,CAAAA,IAAqB;QAAC;QAGjD,OAAO/C;IACT;IAEO,eAAegD,yBACpBpD,QAAsC,EACtC3C,iBAA+B,EAC/BC,WAAyB;QAEzB,MAAM+F,WAAW,MAAMtD,OAAOC,UAAU3C,mBAAmBC;QAC3D4B,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOmE,UAAU;QACjB,MAAM5E,SAASnB,YAAY,MAAM;QACjC,MAAMgG,cAAcC,cAAcF,SAAS,OAAO,EAAE5E;QACpD,OAAO;YAAE,SAAS6E;YAAa,OAAOD,SAAS,KAAK;QAAC;IACvD;IAEO,eAAeG,yBACpBC,IAAY,EACZpG,iBAA+B,EAC/BC,WAAyB;QAEzB,MAAM,EAAEwD,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,OAAO0D,MAAMpG,mBAAmBC;QACjE,OAAO;YAAEwD;YAASE;QAAM;IAC1B;IAEO,SAAS0C,yBAAyBL,QAAgB;QACvD,IAAI;YAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;YACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;YAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;YAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;YAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;YACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;QAE3B,EAAE,OAAM,CAAC;QAET,OAAOR;IACT;IAEO,SAASS,yBAAyBC,KAAa;QACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;QAG5C,OAAOA;IACT;IAEO,SAASR,cAAcQ,KAAa,EAAEtF,MAAgC;QAC3E,MAAMuF,kBAAkBN,yBAAyBK;QAEjD,IAAIC,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,KAAK,CAAC,oBAAoB;gBACtCC;YAAP,OAAO,QAAAA,CAAAA,yBAAAA,gBACJ,KAAK,CAAC,kBAAiB,IADnBA,KAAAA,IAAAA,uBAEH,KAAK,CAAC,GACP,GAAG,CAAC9C;QACT;QACA,IAAI;YACF,OAAOgB,KAAK,KAAK,CAAC6B;QACpB,EAAE,OAAM,CAAC;QACT,IAAI;YACF,OAAO7B,KAAK,KAAK,CAAC+B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWF;QAC/B,EAAE,OAAOnB,GAAG,CAAC;QAEb,IAAIpE,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;YAC1D,MAAM0F,aAAaL,yBAAyBE;YAC5C,OAAO7B,KAAK,KAAK,CAAC+B,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWC;QAC/B;QACA,MAAMxE,MAAM,CAAC,+BAA+B,EAAEoE,OAAO;IACvD"}

package/dist/lib/ai-model/ui-tars-planning.js CHANGED Viewed

@@ -25,7 +25,7 @@ var __webpack_exports__ = {};
 __webpack_require__.r(__webpack_exports__);
 __webpack_require__.d(__webpack_exports__, {
     resizeImageForUiTars: ()=>resizeImageForUiTars,
-    vlmPlanning: ()=>vlmPlanning
+    uiTarsPlanning: ()=>uiTarsPlanning
 });
 const env_namespaceObject = require("@midscene/shared/env");
 const img_namespaceObject = require("@midscene/shared/img");
@@ -44,18 +44,31 @@ const pointToBbox = (point, width, height)=>[
         Math.round(Math.min(point.x + bboxSize / 2, width)),
         Math.round(Math.min(point.y + bboxSize / 2, height))
     ];
-async function vlmPlanning(options) {
-    const { conversationHistory, userInstruction, size, modelConfig } = options;
+async function uiTarsPlanning(userInstruction, options) {
+    const { conversationHistory, context, modelConfig } = options;
     const { uiTarsModelVersion } = modelConfig;
     const systemPrompt = (0, ui_tars_planning_js_namespaceObject.getUiTarsPlanningPrompt)() + userInstruction;
+    const imagePayload = await resizeImageForUiTars(context.screenshotBase64, context.size, uiTarsModelVersion);
+    conversationHistory.append({
+        role: 'user',
+        content: [
+            {
+                type: 'image_url',
+                image_url: {
+                    url: imagePayload
+                }
+            }
+        ]
+    });
     const res = await (0, index_js_namespaceObject.callAIWithStringResponse)([
         {
             role: 'user',
             content: systemPrompt
         },
-        ...conversationHistory
+        ...conversationHistory.snapshot()
     ], external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT, modelConfig);
     const convertedText = convertBboxToCoordinates(res.content);
+    const { size } = context;
     const { parsed } = (0, action_parser_namespaceObject.actionParser)({
         prediction: convertedText,
         factor: [
@@ -70,8 +83,10 @@ async function vlmPlanning(options) {
     });
     debug('ui-tars modelVer', uiTarsModelVersion, ', parsed', JSON.stringify(parsed));
     const transformActions = [];
+    let shouldContinue = true;
     parsed.forEach((action)=>{
-        if ('click' === action.action_type) {
+        const actionType = (action.action_type || '').toLowerCase();
+        if ('click' === actionType) {
             (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
             const point = getPoint(action.action_inputs.start_box, size);
             transformActions.push({
@@ -86,7 +101,7 @@ async function vlmPlanning(options) {
                     }
                 }
             });
-        } else if ('drag' === action.action_type) {
+        } else if ('drag' === actionType) {
             (0, utils_namespaceObject.assert)(action.action_inputs.start_box, 'start_box is required');
             (0, utils_namespaceObject.assert)(action.action_inputs.end_box, 'end_box is required');
             const startPoint = getPoint(action.action_inputs.start_box, size);
@@ -111,26 +126,28 @@ async function vlmPlanning(options) {
                 },
                 thought: action.thought || ''
             });
-        } else if ('type' === action.action_type) transformActions.push({
+        } else if ('type' === actionType) transformActions.push({
             type: 'Input',
             param: {
                 value: action.action_inputs.content
             },
             thought: action.thought || ''
         });
-        else if ('scroll' === action.action_type) transformActions.push({
+        else if ('scroll' === actionType) transformActions.push({
             type: 'Scroll',
             param: {
                 direction: action.action_inputs.direction
             },
             thought: action.thought || ''
         });
-        else if ('finished' === action.action_type) transformActions.push({
-            type: 'Finished',
-            param: {},
-            thought: action.thought || ''
-        });
-        else if ('hotkey' === action.action_type) if (action.action_inputs.key) {
+        else if ('finished' === actionType) {
+            shouldContinue = false;
+            transformActions.push({
+                type: 'Finished',
+                param: {},
+                thought: action.thought || ''
+            });
+        } else if ('hotkey' === actionType) if (action.action_inputs.key) {
             const keys = (0, us_keyboard_layout_namespaceObject.transformHotkeyInput)(action.action_inputs.key);
             transformActions.push({
                 type: 'KeyboardPress',
@@ -140,7 +157,7 @@ async function vlmPlanning(options) {
                 thought: action.thought || ''
             });
         } else console.warn('No key found in action: hotkey. Will not perform action.');
-        else if ('wait' === action.action_type) transformActions.push({
+        else if ('wait' === actionType) transformActions.push({
             type: 'Sleep',
             param: {
                 timeMs: 1000
@@ -155,12 +172,17 @@ async function vlmPlanning(options) {
         }
     });
     debug('transformActions', JSON.stringify(transformActions, null, 2));
+    const log = (0, ui_tars_planning_js_namespaceObject.getSummary)(res.content);
+    conversationHistory.append({
+        role: 'assistant',
+        content: log
+    });
     return {
         actions: transformActions,
-        actionsFromModel: parsed,
-        action_summary: (0, ui_tars_planning_js_namespaceObject.getSummary)(res.content),
+        log,
         usage: res.usage,
-        rawResponse: JSON.stringify(res.content, void 0, 2)
+        rawResponse: JSON.stringify(res.content, void 0, 2),
+        more_actions_needed_by_instruction: shouldContinue
     };
 }
 function convertBboxToCoordinates(text) {
@@ -204,10 +226,10 @@ async function resizeImageForUiTars(imageBase64, size, uiTarsVersion) {
     return imageBase64;
 }
 exports.resizeImageForUiTars = __webpack_exports__.resizeImageForUiTars;
-exports.vlmPlanning = __webpack_exports__.vlmPlanning;
+exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
 for(var __webpack_i__ in __webpack_exports__)if (-1 === [
     "resizeImageForUiTars",
-    "vlmPlanning"
+    "uiTarsPlanning"
 ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
 Object.defineProperty(exports, '__esModule', {
     value: true