@midscene/core 1.2.2-beta-20260116114131.0 → 1.2.2-beta-20260119114334.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/es/agent/agent.mjs +4 -4
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +3 -4
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/auto-glm/index.mjs +2 -2
  7. package/dist/es/ai-model/auto-glm/planning.mjs +1 -1
  8. package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
  9. package/dist/es/ai-model/auto-glm/prompt.mjs +8 -8
  10. package/dist/es/ai-model/auto-glm/prompt.mjs.map +1 -1
  11. package/dist/es/ai-model/auto-glm/util.mjs +6 -3
  12. package/dist/es/ai-model/auto-glm/util.mjs.map +1 -1
  13. package/dist/es/ai-model/index.mjs +2 -2
  14. package/dist/es/ai-model/inspect.mjs +12 -12
  15. package/dist/es/ai-model/inspect.mjs.map +1 -1
  16. package/dist/es/ai-model/llm-planning.mjs +51 -7
  17. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  18. package/dist/es/ai-model/prompt/common.mjs +2 -2
  19. package/dist/es/ai-model/prompt/common.mjs.map +1 -1
  20. package/dist/es/ai-model/prompt/extraction.mjs +3 -1
  21. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  22. package/dist/es/ai-model/prompt/llm-locator.mjs +5 -3
  23. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  24. package/dist/es/ai-model/prompt/llm-planning.mjs +48 -52
  25. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  26. package/dist/es/ai-model/prompt/llm-section-locator.mjs +5 -3
  27. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  28. package/dist/es/ai-model/service-caller/index.mjs +12 -13
  29. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  30. package/dist/es/ai-model/ui-tars-planning.mjs +2 -24
  31. package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
  32. package/dist/es/common.mjs +10 -9
  33. package/dist/es/common.mjs.map +1 -1
  34. package/dist/es/service/index.mjs +6 -6
  35. package/dist/es/service/index.mjs.map +1 -1
  36. package/dist/es/utils.mjs +2 -2
  37. package/dist/lib/agent/agent.js +3 -3
  38. package/dist/lib/agent/agent.js.map +1 -1
  39. package/dist/lib/agent/tasks.js +2 -3
  40. package/dist/lib/agent/tasks.js.map +1 -1
  41. package/dist/lib/agent/utils.js +1 -1
  42. package/dist/lib/ai-model/auto-glm/index.js +3 -0
  43. package/dist/lib/ai-model/auto-glm/planning.js +1 -1
  44. package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
  45. package/dist/lib/ai-model/auto-glm/prompt.js +8 -8
  46. package/dist/lib/ai-model/auto-glm/prompt.js.map +1 -1
  47. package/dist/lib/ai-model/auto-glm/util.js +10 -4
  48. package/dist/lib/ai-model/auto-glm/util.js.map +1 -1
  49. package/dist/lib/ai-model/index.js +0 -3
  50. package/dist/lib/ai-model/inspect.js +12 -12
  51. package/dist/lib/ai-model/inspect.js.map +1 -1
  52. package/dist/lib/ai-model/llm-planning.js +52 -5
  53. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  54. package/dist/lib/ai-model/prompt/common.js +2 -2
  55. package/dist/lib/ai-model/prompt/common.js.map +1 -1
  56. package/dist/lib/ai-model/prompt/extraction.js +5 -3
  57. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  58. package/dist/lib/ai-model/prompt/llm-locator.js +5 -3
  59. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  60. package/dist/lib/ai-model/prompt/llm-planning.js +48 -52
  61. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  62. package/dist/lib/ai-model/prompt/llm-section-locator.js +5 -3
  63. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  64. package/dist/lib/ai-model/service-caller/index.js +11 -12
  65. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  66. package/dist/lib/ai-model/ui-tars-planning.js +1 -26
  67. package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
  68. package/dist/lib/common.js +10 -9
  69. package/dist/lib/common.js.map +1 -1
  70. package/dist/lib/service/index.js +6 -6
  71. package/dist/lib/service/index.js.map +1 -1
  72. package/dist/lib/utils.js +2 -2
  73. package/dist/types/ai-model/auto-glm/index.d.ts +1 -1
  74. package/dist/types/ai-model/auto-glm/prompt.d.ts +3 -3
  75. package/dist/types/ai-model/auto-glm/util.d.ts +11 -5
  76. package/dist/types/ai-model/index.d.ts +1 -1
  77. package/dist/types/ai-model/llm-planning.d.ts +5 -1
  78. package/dist/types/ai-model/prompt/common.d.ts +2 -2
  79. package/dist/types/ai-model/prompt/llm-locator.d.ts +2 -2
  80. package/dist/types/ai-model/prompt/llm-planning.d.ts +3 -3
  81. package/dist/types/ai-model/prompt/llm-section-locator.d.ts +2 -2
  82. package/dist/types/ai-model/service-caller/index.d.ts +2 -2
  83. package/dist/types/ai-model/ui-tars-planning.d.ts +2 -3
  84. package/dist/types/common.d.ts +5 -5
  85. package/package.json +2 -2
  86. package/dist/es/ai-model/prompt/assertion.mjs +0 -31
  87. package/dist/es/ai-model/prompt/assertion.mjs.map +0 -1
  88. package/dist/lib/ai-model/prompt/assertion.js +0 -65
  89. package/dist/lib/ai-model/prompt/assertion.js.map +0 -1
  90. package/dist/types/ai-model/prompt/assertion.d.ts +0 -2
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/extraction.mjs","sources":["../../../../src/ai-model/prompt/extraction.ts"],"sourcesContent":["import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport function systemPromptToExtract() {\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\nThe user will give you a screenshot, the contents of it (optional), and some data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nIf the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.\n\n\nReturn in the following JSON format:\n{\n thought: string, // the thinking process of the extraction, less then 300 words\n data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.\n errors: [], // string[], error message if any\n}\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: {\n name: \"John\",\n age: 30,\n isAdmin: true\n },\n}\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: [\"todo 1\", \"todo 2\", \"todo 3\"],\n}\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: \"todo list\",\n}\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"result\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: { result: true },\n}\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string | Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n\nexport const extractDataSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'extract_data',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n data: {\n type: 'object',\n description: 'The extracted data',\n },\n errors: {\n type: 'array',\n items: {\n type: 'string',\n },\n description: 'Error messages, if any',\n },\n },\n required: ['data', 'errors'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["systemPromptToExtract","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON","extractDataSchema"],"mappings":"AAEO,SAASA;IACd,OAAO,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAkFV,CAAC;AACD;AAEO,MAAMC,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH;AAEO,MAAME,oBAA8C;IACzD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,QAAQ;oBACN,MAAM;oBACN,OAAO;wBACL,MAAM;oBACR;oBACA,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAS;YAC5B,sBAAsB;QACxB;IACF;AACF"}
1
+ {"version":3,"file":"ai-model/prompt/extraction.mjs","sources":["../../../../src/ai-model/prompt/extraction.ts"],"sourcesContent":["import { getPreferredLanguage } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport function systemPromptToExtract() {\n const preferredLanguage = getPreferredLanguage();\n\n return `\nYou are a versatile professional in software UI design and testing. Your outstanding contributions will impact the user experience of billions of users.\n\nThe user will give you a screenshot, the contents of it (optional), and some data requirements in <DATA_DEMAND>. You need to understand the user's requirements and extract the data satisfying the <DATA_DEMAND>.\n\nIf a key specifies a JSON data type (such as Number, String, Boolean, Object, Array), ensure the returned value strictly matches that data type.\n\nIf the user provides multiple reference images, please carefully review the reference images with the screenshot and provide the correct answer for <DATA_DEMAND>.\n\n\nReturn in the following JSON format:\n{\n thought: string, // the thinking process of the extraction, less then 300 words. Use ${preferredLanguage} in this field.\n data: any, // the extracted data. Make sure both the value and scheme meet the DATA_DEMAND. If you want to write some description in this field, use the same language as the DATA_DEMAND.\n errors: [], // string[], error message if any\n}\n\n# Example 1\nFor example, if the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"name\": \"name shows on the left panel, string\",\n \"age\": \"age shows on the right panel, number\",\n \"isAdmin\": \"if the user is admin, boolean\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: {\n name: \"John\",\n age: 30,\n isAdmin: true\n },\n}\n\n# Example 2\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe todo items list, string[]\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: [\"todo 1\", \"todo 2\", \"todo 3\"],\n}\n\n# Example 3\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\nthe page title, string\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: \"todo list\",\n}\n\n# Example 4\nIf the DATA_DEMAND is:\n\n<DATA_DEMAND>\n{\n \"result\": \"Boolean, is it currently the SMS page?\"\n}\n</DATA_DEMAND>\n\nBy viewing the screenshot and page contents, you can extract the following data:\n\n{\n thought: \"According to the screenshot, i can see ...\",\n data: { result: true },\n}\n`;\n}\n\nexport const extractDataQueryPrompt = (\n pageDescription: string,\n dataQuery: string | Record<string, string>,\n) => {\n let dataQueryText = '';\n if (typeof dataQuery === 'string') {\n dataQueryText = dataQuery;\n } else {\n dataQueryText = JSON.stringify(dataQuery, null, 2);\n }\n\n return `\n<PageDescription>\n${pageDescription}\n</PageDescription>\n\n<DATA_DEMAND>\n${dataQueryText}\n</DATA_DEMAND>\n `;\n};\n\nexport const extractDataSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'extract_data',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n data: {\n type: 'object',\n description: 'The extracted data',\n },\n errors: {\n type: 'array',\n items: {\n type: 'string',\n },\n description: 'Error messages, if any',\n },\n },\n required: ['data', 'errors'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["systemPromptToExtract","preferredLanguage","getPreferredLanguage","extractDataQueryPrompt","pageDescription","dataQuery","dataQueryText","JSON","extractDataSchema"],"mappings":";AAGO,SAASA;IACd,MAAMC,oBAAoBC;IAE1B,OAAO,CAAC;;;;;;;;;;;;uFAY6E,EAAED,kBAAkB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAsE3G,CAAC;AACD;AAEO,MAAME,yBAAyB,CACpCC,iBACAC;IAEA,IAAIC,gBAAgB;IAElBA,gBADE,AAAqB,YAArB,OAAOD,YACOA,YAEAE,KAAK,SAAS,CAACF,WAAW,MAAM;IAGlD,OAAO,CAAC;;AAEV,EAAED,gBAAgB;;;;AAIlB,EAAEE,cAAc;;EAEd,CAAC;AACH;AAEO,MAAME,oBAA8C;IACzD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,QAAQ;oBACN,MAAM;oBACN,OAAO;wBACL,MAAM;oBACR;oBACA,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAS;YAC5B,sBAAsB;QACxB;IACF;AACF"}
@@ -1,6 +1,8 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
1
2
  import { bboxDescription } from "./common.mjs";
2
- function systemPromptToLocateElement(vlMode) {
3
- const bboxComment = bboxDescription(vlMode);
3
+ function systemPromptToLocateElement(modelFamily) {
4
+ const preferredLanguage = getPreferredLanguage();
5
+ const bboxComment = bboxDescription(modelFamily);
4
6
  return `
5
7
  ## Role:
6
8
  You are an AI assistant that helps identify UI elements.
@@ -33,7 +35,7 @@ When no element is found:
33
35
  \`\`\`json
34
36
  {
35
37
  "bbox": [],
36
- "errors": ["I can see ..., but {some element} is not found"]
38
+ "errors": ["I can see ..., but {some element} is not found. Use ${preferredLanguage}."]
37
39
  }
38
40
  \`\`\`
39
41
  `;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";AAEO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}
1
+ {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(\n modelFamily: TModelFamily | undefined,\n) {\n const preferredLanguage = getPreferredLanguage();\n const bboxComment = bboxDescription(modelFamily);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found. Use ${preferredLanguage}.\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = (targetElementDescription: string) =>\n `Find: ${targetElementDescription}`;\n"],"names":["systemPromptToLocateElement","modelFamily","preferredLanguage","getPreferredLanguage","bboxComment","bboxDescription","findElementPrompt","targetElementDescription"],"mappings":";;AAGO,SAASA,4BACdC,WAAqC;IAErC,MAAMC,oBAAoBC;IAC1B,MAAMC,cAAcC,gBAAgBJ;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEG,YAAY;;;;;;;;;;;;;;;;;;;;;kEAqBI,EAAEF,kBAAkB;;;AAGtF,CAAC;AACD;AAEO,MAAMI,oBAAoB,CAACC,2BAChC,CAAC,MAAM,EAAEA,0BAA0B"}
@@ -1,16 +1,8 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
1
2
  import { getZodDescription, getZodTypeName } from "@midscene/shared/zod-schema-utils";
2
3
  import { bboxDescription } from "./common.mjs";
3
- const buildCommonOutputFields = (includeThought)=>{
4
- const fields = [
5
- '"note"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action.',
6
- `"log": string, // a brief preamble to the user explaining what you’re about to do`,
7
- '"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user\'s instruction.'
8
- ];
9
- if (includeThought) fields.unshift('"thought": string, // your thought process about the next action');
10
- return fields.join('\n ');
11
- };
12
- const vlLocateParam = (vlMode)=>{
13
- if (vlMode) return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;
4
+ const vlLocateParam = (modelFamily)=>{
5
+ if (modelFamily) return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;
14
6
  return "{ prompt: string /* description of the target element */ }";
15
7
  };
16
8
  const findDefaultValue = (field)=>{
@@ -71,18 +63,33 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
71
63
  ${tab}${fields.join(`\n${tab}`)}
72
64
  `.trim();
73
65
  };
74
- async function systemPromptToTaskPlanning({ actionSpace, vlMode, includeBbox, includeThought }) {
75
- if (includeBbox && !vlMode) throw new Error('vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.');
76
- const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam(includeBbox ? vlMode : void 0)));
66
+ async function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbox, includeThought }) {
67
+ const preferredLanguage = getPreferredLanguage();
68
+ if (includeBbox && !modelFamily) throw new Error('modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.');
69
+ const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam(includeBbox ? modelFamily : void 0)));
77
70
  const actionList = actionDescriptionList.join('\n');
71
+ const thoughtFieldInstruction = `
72
+ ## About the \`thought\` field (reasoning process)
73
+
74
+ The \`thought\` field captures your structured reasoning about the next action. It should include:
75
+
76
+ 1. **Overall task**: What is the overall instruction/goal?
77
+ 2. **Current observation**: What do I see on the current screen?
78
+ 3. **Progress**: What steps have been completed so far?
79
+ 4. **Next step**: What should the next step be and why?
80
+ 5. **Action selection**: Which Action Type should be used to accomplish this step?
81
+
82
+ **Example thought structure:**
83
+ "The overall task is to [task]. On the current screen, I can see [observations]. We have completed [steps]. The next step should be [next action] because [reason]. I will use [Action Type] to accomplish this."
84
+ `;
78
85
  const logFieldInstruction = `
79
86
  ## About the \`log\` field (preamble message)
80
87
 
81
- The \`log\` field is a brief preamble message to the user explaining what youre about to do. It should follow these principles and examples:
88
+ The \`log\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:
82
89
 
83
- - **Use the same language as the user's instruction**
90
+ - **Use ${preferredLanguage}**
84
91
  - **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).
85
- - **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with whats been done so far and create a sense of momentum and clarity for the user to understand your next actions.
92
+ - **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.
86
93
  - **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
87
94
 
88
95
  **Examples:**
@@ -92,11 +99,6 @@ The \`log\` field is a brief preamble message to the user explaining what you’
92
99
  - "Go back to find the login button"
93
100
  `;
94
101
  const shouldIncludeThought = includeThought ?? true;
95
- const commonOutputFields = buildCommonOutputFields(shouldIncludeThought);
96
- const exampleThoughtLine = shouldIncludeThought ? ` "thought": "The form has already been filled, I need to click the login button to login",
97
- ` : '';
98
- const exampleThoughtLineWithNote = shouldIncludeThought ? ` "thought": "I need to note the titles in the current screenshot for further processing and scroll to find more titles",
99
- ` : '';
100
102
  return `
101
103
  Target: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.
102
104
 
@@ -114,51 +116,45 @@ Please tell what the next one action is (or null if no action should be done) to
114
116
  ## Supporting actions
115
117
  ${actionList}
116
118
 
119
+ ${shouldIncludeThought ? thoughtFieldInstruction : ''}
120
+
117
121
  ${logFieldInstruction}
118
122
 
119
123
  ## Return format
120
124
 
121
- Return in JSON format:
122
- {
123
- ${commonOutputFields}
124
- "action":
125
- {
126
- "type": string, // the type of the action
127
- "param"?: { // The parameter of the action, if any
128
- // k-v style parameter fields
129
- },
130
- } | null
131
- }
125
+ Return in XML format with the following structure:
126
+ ${shouldIncludeThought ? '<thought>Structured reasoning: overall task, current observation, progress, next step, action selection</thought>' : ''}
127
+ <note>CRITICAL: If any information from the current screenshot will be needed in follow-up actions, you MUST record it here completely. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information for later use. Examples: extracted data, element states, content that needs to be referenced. Leave empty if no follow-up information is needed.</note>
128
+ <log>a brief preamble to the user</log>
129
+ <error>error messages (optional)</error>
130
+ <action-type>the type of the action, or null if no action</action-type>
131
+ <action-param-json>JSON object containing the action parameters</action-param-json>
132
132
 
133
133
  For example, if the instruction is to login and the form has already been filled, this is a valid return value:
134
134
 
135
+ ${shouldIncludeThought ? '<thought>The overall task is to login. On the current screen, I can see a login form with username and password fields already filled, and a login button. We have completed filling in the credentials. The next step should be to submit the login form by clicking the login button. I will use Tap action to click it.</thought>' : ''}<log>Click the login button</log>
136
+ <action-type>Tap</action-type>
137
+ <action-param-json>
135
138
  {
136
- ${exampleThoughtLine} "log": "Click the login button",
137
- "action": {
138
- "type": "Tap",
139
- "param": {
140
- "locate": {
141
- "prompt": "The login button"${vlMode && includeBbox ? ', "bbox": [100, 200, 300, 400]' : ''}
142
- }
143
- }
139
+ "locate": {
140
+ "prompt": "The login button"${modelFamily && includeBbox ? ', "bbox": [100, 200, 300, 400]' : ''}
144
141
  }
145
142
  }
143
+ </action-param-json>
146
144
 
147
145
  For example, if the instruction is to find out every title in the screenshot, the return value should be:
148
146
 
147
+ ${shouldIncludeThought ? '<thought>The overall task is to find out every title in the screenshot. On the current screen, I can see several article titles: \'Hello, world!\', \'Midscene 101\', and \'Model strategy\'. We have just started and observed the visible titles. The next step should be to scroll down to find more titles that may not be currently visible. I will use Scroll action to reveal more content.</thought>' : ''}<note>The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'</note>
148
+ <log>Scroll to find more titles</log>
149
+ <action-type>Scroll</action-type>
150
+ <action-param-json>
149
151
  {
150
- ${exampleThoughtLineWithNote} "note": "The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'",
151
- "log": "Scroll to find more titles",
152
- "action": {
153
- "type": "Scroll",
154
- "param": {
155
- "locate": {
156
- "prompt": "The page content area"
157
- },
158
- "direction": "down"
159
- }
160
- }
152
+ "locate": {
153
+ "prompt": "The page content area"
154
+ },
155
+ "direction": "down"
161
156
  }
157
+ </action-param-json>
162
158
  `;
163
159
  }
164
160
  export { descriptionForAction, systemPromptToTaskPlanning };
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst buildCommonOutputFields = (includeThought: boolean) => {\n const fields = [\n `\"note\"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action.`,\n `\"log\": string, // a brief preamble to the user explaining what you’re about to do`,\n `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.`,\n ];\n\n if (includeThought) {\n fields.unshift(\n `\"thought\": string, // your thought process about the next action`,\n );\n }\n\n return fields.join('\\n ');\n};\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n const commonOutputFields = buildCommonOutputFields(shouldIncludeThought);\n const exampleThoughtLine = shouldIncludeThought\n ? ` \"thought\": \"The form has already been filled, I need to click the login button to login\",\n`\n : '';\n const exampleThoughtLineWithNote = shouldIncludeThought\n ? ` \"thought\": \"I need to note the titles in the current screenshot for further processing and scroll to find more titles\",\n`\n : '';\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n${exampleThoughtLine} \"log\": \"Click the login button\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n{\n${exampleThoughtLineWithNote} \"note\": \"The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'\",\n \"log\": \"Scroll to find more titles\",\n \"action\": {\n \"type\": \"Scroll\",\n \"param\": {\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n }\n }\n}\n`;\n}\n"],"names":["buildCommonOutputFields","includeThought","fields","vlLocateParam","vlMode","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","actionList","logFieldInstruction","shouldIncludeThought","commonOutputFields","exampleThoughtLine","exampleThoughtLineWithNote"],"mappings":";;AAYA,MAAMA,0BAA0B,CAACC;IAC/B,MAAMC,SAAS;QACb;QACA,CAAC,iFAAiF,CAAC;QACnF;KACD;IAED,IAAID,gBACFC,OAAO,OAAO,CACZ;IAIJ,OAAOA,OAAO,IAAI,CAAC;AACrB;AAEA,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMb,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEW,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMG,aAAuB,EAAE;QAG/B,MAAMC,SAASJ,OAAO,WAAW;QAIjC,MAAMK,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKb,MAAM,IAAIc,OAAO,OAAO,CAACF,OACxC,IAAIZ,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AACE,cADF,OAAQf,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMgB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAelB,OAAOO;gBAGvC,MAAMY,cAAcC,kBAAkBpB;gBAGtC,MAAMqB,eAAetB,iBAAiBC;gBACtC,MAAMsB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBd,OAAO,IAAI,CAAC;gBACZc,WAAW,OAAO,CAAC,CAACmB;oBAClBjC,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEiC,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBlC,OAAO,IAAI,CAACkC;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEvB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMb,OAAO,IAAI,CAAC,CAAC,EAAE,EAAEa,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAesB,2BAA2B,EAC/CC,WAAW,EACXlC,MAAM,EACNmC,WAAW,EACXtC,cAAc,EAMf;IAEC,IAAIsC,eAAe,CAACnC,QAClB,MAAM,IAAIoC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAACzB,SACtCD,qBACLC,QACAV,cAAcoC,cAAcnC,SAAS0B;IAGzC,MAAMY,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,MAAMC,uBAAuB3C,kBAAkB;IAC/C,MAAM4C,qBAAqB7C,wBAAwB4C;IACnD,MAAME,qBAAqBF,uBACvB,CAAC;AACP,CAAC,GACK;IACJ,MAAMG,6BAA6BH,uBAC/B,CAAC;AACP,CAAC,GACK;IAEJ,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAEF,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;EAMpB,EAAEE,mBAAmB;;;;;;;;;;;;;AAavB,EAAEC,mBAAmB;;;;;oCAKe,EAAE1C,UAAUmC,cAAc,mCAAmC,GAAG;;;;;;;;;AASpG,EAAEQ,2BAA2B;;;;;;;;;;;;AAY7B,CAAC;AACD"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const thoughtFieldInstruction = `\n## About the \\`thought\\` field (reasoning process)\n\nThe \\`thought\\` field captures your structured reasoning about the next action. It should include:\n\n1. **Overall task**: What is the overall instruction/goal?\n2. **Current observation**: What do I see on the current screen?\n3. **Progress**: What steps have been completed so far?\n4. **Next step**: What should the next step be and why?\n5. **Action selection**: Which Action Type should be used to accomplish this step?\n\n**Example thought structure:**\n\"The overall task is to [task]. On the current screen, I can see [observations]. We have completed [steps]. The next step should be [next action] because [reason]. I will use [Action Type] to accomplish this.\"\n`;\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${shouldIncludeThought ? thoughtFieldInstruction : ''}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in XML format with the following structure:\n${shouldIncludeThought ? '<thought>Structured reasoning: overall task, current observation, progress, next step, action selection</thought>' : ''}\n<note>CRITICAL: If any information from the current screenshot will be needed in follow-up actions, you MUST record it here completely. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information for later use. Examples: extracted data, element states, content that needs to be referenced. Leave empty if no follow-up information is needed.</note>\n<log>a brief preamble to the user</log>\n<error>error messages (optional)</error>\n<action-type>the type of the action, or null if no action</action-type>\n<action-param-json>JSON object containing the action parameters</action-param-json>\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n${shouldIncludeThought ? '<thought>The overall task is to login. On the current screen, I can see a login form with username and password fields already filled, and a login button. We have completed filling in the credentials. The next step should be to submit the login form by clicking the login button. I will use Tap action to click it.</thought>' : ''}<log>Click the login button</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": {\n \"prompt\": \"The login button\"${modelFamily && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n}\n</action-param-json>\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n${shouldIncludeThought ? '<thought>The overall task is to find out every title in the screenshot. On the current screen, I can see several article titles: \\'Hello, world!\\', \\'Midscene 101\\', and \\'Model strategy\\'. We have just started and observed the visible titles. The next step should be to scroll down to find more titles that may not be currently visible. I will use Scroll action to reveal more content.</thought>' : ''}<note>The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'</note>\n<log>Scroll to find more titles</log>\n<action-type>Scroll</action-type>\n<action-param-json>\n{\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n}\n</action-param-json>\n`;\n}\n"],"names":["vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","includeThought","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","thoughtFieldInstruction","logFieldInstruction","shouldIncludeThought"],"mappings":";;;AAWA,MAAMA,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKd,MAAM,IAAIe,OAAO,OAAO,CAACF,OACxC,IAAIb,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMgB,aACJ,AACE,cADF,OAAQhB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMiB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAenB,OAAOO;gBAGvC,MAAMa,cAAcC,kBAAkBrB;gBAGtC,MAAMsB,eAAevB,iBAAiBC;gBACtC,MAAMuB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeuB,2BAA2B,EAC/CC,WAAW,EACXnC,WAAW,EACXoC,WAAW,EACXC,cAAc,EAMf;IACC,MAAMC,oBAAoBC;IAG1B,IAAIH,eAAe,CAACpC,aAClB,MAAM,IAAIwC,MACR;IAIJ,MAAMC,wBAAwBN,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBACLC,QACAV,cAAcqC,cAAcpC,cAAc2B;IAG9C,MAAMe,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,0BAA0B,CAAC;;;;;;;;;;;;;AAanC,CAAC;IAEC,MAAMC,sBAAsB,CAAC;;;;;QAKvB,EAAEN,kBAAkB;;;;;;;;;;AAU5B,CAAC;IAEC,MAAMO,uBAAuBR,kBAAkB;IAE/C,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAEK,WAAW;;AAEb,EAAEG,uBAAuBF,0BAA0B,GAAG;;AAEtD,EAAEC,oBAAoB;;;;;AAKtB,EAAEC,uBAAuB,sHAAsH,GAAG;;;;;;;;;AASlJ,EAAEA,uBAAuB,yUAAyU,GAAG;;;;;gCAKrU,EAAE7C,eAAeoC,cAAc,mCAAmC,GAAG;;;;;;;AAOrG,EAAES,uBAAuB,iZAAiZ,GAAG;;;;;;;;;;;AAW7a,CAAC;AACD"}
@@ -1,6 +1,8 @@
1
+ import { getPreferredLanguage } from "@midscene/shared/env";
1
2
  import { bboxDescription } from "./common.mjs";
2
- function systemPromptToLocateSection(vlMode) {
3
- const bboxFormat = bboxDescription(vlMode);
3
+ function systemPromptToLocateSection(modelFamily) {
4
+ const preferredLanguage = getPreferredLanguage();
5
+ const bboxFormat = bboxDescription(modelFamily);
4
6
  return `
5
7
  ## Role:
6
8
  You are an AI assistant that helps identify UI elements.
@@ -24,7 +26,7 @@ You are an AI assistant that helps identify UI elements.
24
26
  Fields:
25
27
  * \`bbox\` - Bounding box of the section containing the target element
26
28
  * \`references_bbox\` - Optional array of bounding boxes for reference elements
27
- * \`error\` - Optional error message if the section cannot be found
29
+ * \`error\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.
28
30
 
29
31
  Example:
30
32
  If the description is "delete button on the second row with title 'Peter'", return:
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-section-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["import type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(vlMode: TVlModeTypes | undefined) {\n const bboxFormat = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["systemPromptToLocateSection","vlMode","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,aAAaC,gBAAgBF;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,WAAW;;;;;;;;;;;;;;;;;;;;;;AAsB7D,CAAC;AACD;AAEO,MAAME,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
1
+ {"version":3,"file":"ai-model/prompt/llm-section-locator.mjs","sources":["../../../../src/ai-model/prompt/llm-section-locator.ts"],"sourcesContent":["import type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\n\nexport function systemPromptToLocateSection(\n modelFamily: TModelFamily | undefined,\n) {\n const preferredLanguage = getPreferredLanguage();\n const bboxFormat = bboxDescription(modelFamily);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Find a section containing the target element\n- If the description mentions reference elements, also locate sections containing those references\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxFormat}\n \"references_bbox\"?: [\n [number, number, number, number],\n ...\n ],\n \"error\"?: string\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` - Bounding box of the section containing the target element\n* \\`references_bbox\\` - Optional array of bounding boxes for reference elements\n* \\`error\\` - Optional error message if the section cannot be found. Use ${preferredLanguage}.\n\nExample:\nIf the description is \"delete button on the second row with title 'Peter'\", return:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"references_bbox\": [[100, 100, 200, 200]]\n}\n\\`\\`\\`\n`;\n}\n\nexport const sectionLocatorInstruction = (sectionDescription: string) =>\n `Find section containing: ${sectionDescription}`;\n"],"names":["systemPromptToLocateSection","modelFamily","preferredLanguage","getPreferredLanguage","bboxFormat","bboxDescription","sectionLocatorInstruction","sectionDescription"],"mappings":";;AAIO,SAASA,4BACdC,WAAqC;IAErC,MAAMC,oBAAoBC;IAC1B,MAAMC,aAAaC,gBAAgBJ;IACnC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEG,WAAW;;;;;;;;;;;;yEAYY,EAAEF,kBAAkB;;;;;;;;;;AAU7F,CAAC;AACD;AAEO,MAAMI,4BAA4B,CAACC,qBACxC,CAAC,yBAAyB,EAAEA,oBAAoB"}
@@ -3,9 +3,9 @@ import { getDebug } from "@midscene/shared/logger";
3
3
  import { assert, ifInBrowser } from "@midscene/shared/utils";
4
4
  import { jsonrepair } from "jsonrepair";
5
5
  import openai_0 from "openai";
6
- import { isAutoGLM } from "../auto-glm/util.mjs";
6
+ import { isAutoGLM, isUITars } from "../auto-glm/util.mjs";
7
7
  async function createChatClient({ modelConfig }) {
8
- const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, modelFamily, createOpenAIClient, timeout } = modelConfig;
8
+ const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion, modelFamily, createOpenAIClient, timeout } = modelConfig;
9
9
  let proxyAgent;
10
10
  const debugProxy = getDebug('ai:call:proxy');
11
11
  const sanitizeProxyUrl = (url)=>{
@@ -99,13 +99,12 @@ async function createChatClient({ modelConfig }) {
99
99
  completion: openai.chat.completions,
100
100
  modelName,
101
101
  modelDescription,
102
- uiTarsVersion,
103
- vlMode,
102
+ uiTarsModelVersion,
104
103
  modelFamily
105
104
  };
106
105
  }
107
106
  async function callAI(messages, modelConfig, options) {
108
- const { completion, modelName, modelDescription, uiTarsVersion, vlMode, modelFamily } = await createChatClient({
107
+ const { completion, modelName, modelDescription, uiTarsModelVersion, modelFamily } = await createChatClient({
109
108
  modelConfig
110
109
  });
111
110
  const maxTokens = globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ?? globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);
@@ -138,11 +137,11 @@ async function callAI(messages, modelConfig, options) {
138
137
  temperature,
139
138
  stream: !!isStreaming,
140
139
  max_tokens: maxTokens,
141
- ...'qwen2.5-vl' === vlMode ? {
140
+ ...'qwen2.5-vl' === modelFamily ? {
142
141
  vl_high_resolution_images: true
143
142
  } : {}
144
143
  };
145
- if (isAutoGLM(vlMode)) {
144
+ if (isAutoGLM(modelFamily)) {
146
145
  commonConfig.top_p = 0.85;
147
146
  commonConfig.frequency_penalty = 0.2;
148
147
  }
@@ -204,7 +203,7 @@ async function callAI(messages, modelConfig, options) {
204
203
  }
205
204
  }
206
205
  content = accumulated;
207
- debugProfileStats(`streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`);
206
+ debugProfileStats(`streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`);
208
207
  } else {
209
208
  const retryCount = modelConfig.retryCount ?? 1;
210
209
  const retryInterval = modelConfig.retryInterval ?? 2000;
@@ -218,7 +217,7 @@ async function callAI(messages, modelConfig, options) {
218
217
  ...deepThinkConfig
219
218
  });
220
219
  timeCost = Date.now() - startTime;
221
- debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`);
220
+ debugProfileStats(`model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`);
222
221
  debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
223
222
  if (!result.choices) throw new Error(`invalid response from LLM service: ${JSON.stringify(result)}`);
224
223
  content = result.choices[0].message.content;
@@ -264,8 +263,8 @@ async function callAIWithObjectResponse(messages, modelConfig, options) {
264
263
  deepThink: options?.deepThink
265
264
  });
266
265
  assert(response, 'empty response');
267
- const vlMode = modelConfig.vlMode;
268
- const jsonContent = safeParseJson(response.content, vlMode);
266
+ const modelFamily = modelConfig.modelFamily;
267
+ const jsonContent = safeParseJson(response.content, modelFamily);
269
268
  assert('object' == typeof jsonContent, `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`);
270
269
  return {
271
270
  content: jsonContent,
@@ -358,7 +357,7 @@ function normalizeJsonObject(obj) {
358
357
  if ('string' == typeof obj) return obj.trim();
359
358
  return obj;
360
359
  }
361
- function safeParseJson(input, vlMode) {
360
+ function safeParseJson(input, modelFamily) {
362
361
  const cleanJsonString = extractJSONFromCodeBlock(input);
363
362
  if (cleanJsonString?.match(/\((\d+),(\d+)\)/)) return cleanJsonString.match(/\((\d+),(\d+)\)/)?.slice(1).map(Number);
364
363
  let parsed;
@@ -375,7 +374,7 @@ function safeParseJson(input, vlMode) {
375
374
  } catch (error) {
376
375
  lastError = error;
377
376
  }
378
- if ('doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode) {
377
+ if ('doubao-vision' === modelFamily || isUITars(modelFamily)) {
379
378
  const jsonString = preprocessDoubaoBboxJson(cleanJsonString);
380
379
  try {
381
380
  parsed = JSON.parse(jsonrepair(jsonString));
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo, DeepThinkOption } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(vlMode)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n modelFamily,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n console.warn(\n `[Midscene] AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n modelFamily,\n}: {\n deepThink?: DeepThinkOption;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (modelFamily === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (modelFamily === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n if (modelFamily === 'glm-v') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`,\n };\n }\n\n if (modelFamily === 'gpt-5') {\n return {\n config: normalizedDeepThink\n ? {\n reasoning: { effort: 'high' },\n }\n : {\n reasoning: { effort: 'low' },\n },\n debugMessage: normalizedDeepThink\n ? 'deepThink mapped to reasoning.effort=high for gpt-5'\n : 'deepThink disabled for gpt-5',\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${modelFamily ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${modelFamily ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","modelFamily","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","isAutoGLM","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","jsonString","String"],"mappings":";;;;;;AAuBA,eAAeA,iBAAiB,EAC9BC,WAAW,EAGZ;IAQC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGZ;IAEJ,IAAIa;IACJ,MAAMC,aAAaC,SAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAIf,WAAW;QACbY,WAAW,oBAAoBE,iBAAiBd;QAChD,IAAIkB,aACFC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKrB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBa,WAAW,qBAAqBE,iBAAiBf;QACjD,IAAImB,aACFC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIlB;YAGzB,IAAI,CAACwB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAEzB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAMgC,gBAAgB;QACpB,SAAS7B;QACT,QAAQC;QAGR,GAAIQ,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGP,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOM,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIlB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAIrB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnCjC;QACAI;QACAC;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC9C,WAAyB,EACzB+C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV7C,SAAS,EACTI,gBAAgB,EAChBC,aAAa,EACbC,MAAM,EACNC,WAAW,EACZ,GAAG,MAAMX,iBAAiB;QACzBC;IACF;IAEA,MAAMiD,YACJZ,oBAAoB,yBAAyB,CAACa,8BAC9Cb,oBAAoB,yBAAyB,CAACc;IAChD,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAczD,YAAY,WAAW,IAAI;IAE/C,MAAM0D,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY5D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMmE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIxC,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI2D,UAAU3D,SAAS;QACpB0D,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWzB,SAAS;QACpBrC;IACF;IACA,IAAI4D,cACFlB,UAAUkB;IAEZ,IAAIC,gBAAgB;QAClBnB,UAAUmB;QACVlD,QAAQ,IAAI,CAACkD;IACf;IAEA,IAAI;QACFnB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEvD,WAAW;QAGrE,IAAIuD,aAAa;YACf,MAAMe,SAAU,MAAMzB,WAAW,MAAM,CACrC;gBACE,OAAO7C;gBACP2C;gBACA,GAAGqB,YAAY;gBACf,GAAGE,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMd,UAAUe,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbZ,QAAQY,MAAM,KAAK;gBAGrB,IAAIf,WAAWgB,mBAAmB;oBAChCf,eAAeD;oBACfE,wBAAwBc;oBACxB,MAAMC,YAAiC;wBACrCjB;wBACAgB;wBACAf;wBACA,YAAY;wBACZ,OAAOiB;oBACT;oBACA9B,QAAQ,OAAO,CAAE6B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCX,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACnB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAegB;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTpB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEiC;oBACjB;gBACF;YACF;YACArB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEsD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE1H,OAAO;YAEL,MAAMwB,aAAajF,YAAY,UAAU,IAAI;YAC7C,MAAMkF,gBAAgBlF,YAAY,aAAa,IAAI;YACnD,MAAMmF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMtC,WAAW,MAAM,CAAC;oBACrC,OAAO7C;oBACP2C;oBACA,GAAGqB,YAAY;oBACf,GAAGE,eAAe;gBACpB;gBAEAN,WAAWP,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAElD,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAE8E,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEvB,SAAS,aAAa,EAAEuB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE7B,eAAe,IAAI;gBAG9VH,mBACE,CAAC,oBAAoB,EAAEiC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI5D,MACR,CAAC,mCAAmC,EAAE6D,KAAK,SAAS,CAACD,SAAS;gBAIlE3B,UAAU2B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3C,IAAI,CAAC3B,SACH,MAAM,IAAIjC,MAAM;gBAGlBmC,uBACGyB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3DxB,QAAQwB,OAAO,KAAK;gBACpB;YACF,EAAE,OAAOtD,OAAO;gBACdoD,YAAYpD;gBACZ,IAAIqD,UAAUF,aAAa;oBACzB9D,QAAQ,IAAI,CACV,CAAC,mCAAmC,EAAEgE,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAEhI,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACvB,SACH,MAAMyB;QAEV;QAEAhC,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QAGxC,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEpB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAegB;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASnB,WAAW;YACpB,mBAAmBE,wBAAwBgB;YAC3C,OAAOb,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAOiC,GAAQ;QACftE,QAAQ,KAAK,CAAC,kBAAkBsE;QAChC,MAAMC,WAAW,IAAIlE,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEvD,UAAU,GAAG,EAAEwF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB/C,QAAsC,EACtC9C,WAAyB,EACzB+C,OAEC;IAOD,MAAM+C,WAAW,MAAMjD,OAAOC,UAAU9C,aAAa;QACnD,WAAW+C,SAAS;IACtB;IACAgD,OAAOD,UAAU;IACjB,MAAMrF,SAAST,YAAY,MAAM;IACjC,MAAMgG,cAAcC,cAAcH,SAAS,OAAO,EAAErF;IACpDsF,OACE,AAAuB,YAAvB,OAAOC,aACP,CAAC,0CAA0C,EAAEhG,YAAY,SAAS,CAAC,GAAG,EAAE8F,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZnG,WAAyB;IAEzB,MAAM,EAAE2D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAOsD,MAAMnG;IAC9C,OAAO;QAAE2D;QAASG;IAAM;AAC1B;AAEO,SAASsC,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASjC,uBAAuB,EACrCkC,SAAS,EACThG,WAAW,EAIZ;IAKC,MAAMiG,sBAAsBD,AAAc,YAAdA,YAAwB7B,SAAY6B;IAEhE,IAAIC,AAAwB9B,WAAxB8B,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAAc9B;IAAU;IAG/C,IAAInE,AAAgB,eAAhBA,aACF,OAAO;QACL,QAAQ;YAAE,iBAAiBiG;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIjG,AAAgB,oBAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,IAAIjG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,UAAU,CAAC;IAC7G;IAGF,IAAIjG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQiG,sBACJ;YACE,WAAW;gBAAE,QAAQ;YAAO;QAC9B,IACA;YACE,WAAW;gBAAE,QAAQ;YAAM;QAC7B;QACJ,cAAcA,sBACV,wDACA;IACN;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAEjG,eAAe,UAAU,CAAC,CAAC;QACzF,gBAAgB,CAAC,0DAA0D,EAAEA,eAAe,UAAU,EAAE,CAAC;IAC3G;AACF;AAQA,SAASkG,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASZ,cAAcQ,KAAa,EAAEhG,MAAgC;IAC3E,MAAM6G,kBAAkBlB,yBAAyBK;IAEjD,IAAIa,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAI1F;IAGT,IAAIV;IACJ,IAAIkE;IACJ,IAAI;QACFlE,SAASqE,KAAK,KAAK,CAAC+B;QACpB,OAAOV,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IACA,IAAI;QACFd,SAASqE,KAAK,KAAK,CAACgC,WAAWD;QAC/B,OAAOV,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IAEA,IAAIvB,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAM+G,aAAahB,yBAAyBc;QAC5C,IAAI;YACFpG,SAASqE,KAAK,KAAK,CAACgC,WAAWC;YAC/B,OAAOZ,oBAAoB1F;QAC7B,EAAE,OAAOc,OAAO;YACdoD,YAAYpD;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAE+F,OACjDrC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}
1
+ {"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import type { AIUsageInfo, DeepThinkOption } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TModelFamily,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport type { AIArgs } from '../../common';\nimport { isAutoGLM, isUITars } from '../auto-glm/util';\n\nasync function createChatClient({\n modelConfig,\n}: {\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsModelVersion?: UITarsModelVersion;\n modelFamily: TModelFamily | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent: any = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n\n // Helper function to sanitize proxy URL for logging (remove credentials)\n // Uses URL API instead of regex to avoid ReDoS vulnerabilities\n const sanitizeProxyUrl = (url: string): string => {\n try {\n const parsed = new URL(url);\n if (parsed.username) {\n // Keep username for debugging, hide password for security\n parsed.password = '****';\n return parsed.href;\n }\n return url;\n } catch {\n // If URL parsing fails, return original URL (will be caught later)\n return url;\n }\n };\n\n if (httpProxy) {\n debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));\n if (ifInBrowser) {\n console.warn(\n 'HTTP proxy is configured but not supported in browser environment',\n );\n } else {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'undici';\n const { ProxyAgent } = await import(moduleName);\n proxyAgent = new ProxyAgent({\n uri: httpProxy,\n // Note: authentication is handled via the URI (e.g., http://user:pass@proxy.com:8080)\n });\n }\n } else if (socksProxy) {\n debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));\n if (ifInBrowser) {\n console.warn(\n 'SOCKS proxy is configured but not supported in browser environment',\n );\n } else {\n try {\n // Dynamic import with variable to avoid bundler static analysis\n const moduleName = 'fetch-socks';\n const { socksDispatcher } = await import(moduleName);\n // Parse SOCKS proxy URL (e.g., socks5://127.0.0.1:1080)\n const proxyUrl = new URL(socksProxy);\n\n // Validate hostname\n if (!proxyUrl.hostname) {\n throw new Error('SOCKS proxy URL must include a valid hostname');\n }\n\n // Validate and parse port\n const port = Number.parseInt(proxyUrl.port, 10);\n if (!proxyUrl.port || Number.isNaN(port)) {\n throw new Error('SOCKS proxy URL must include a valid port');\n }\n\n // Parse SOCKS version from protocol\n const protocol = proxyUrl.protocol.replace(':', '');\n const socksType =\n protocol === 'socks4' ? 4 : protocol === 'socks5' ? 5 : 5;\n\n proxyAgent = socksDispatcher({\n type: socksType,\n host: proxyUrl.hostname,\n port,\n ...(proxyUrl.username\n ? {\n userId: decodeURIComponent(proxyUrl.username),\n password: decodeURIComponent(proxyUrl.password || ''),\n }\n : {}),\n });\n debugProxy('socks proxy configured successfully', {\n type: socksType,\n host: proxyUrl.hostname,\n port: port,\n });\n } catch (error) {\n console.error('Failed to configure SOCKS proxy:', error);\n throw new Error(\n `Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`,\n );\n }\n }\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n // Use fetchOptions.dispatcher for fetch-based SDK instead of httpAgent\n // Note: Type assertion needed due to undici version mismatch between dependencies\n ...(proxyAgent ? { fetchOptions: { dispatcher: proxyAgent as any } } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: string;\n reasoning_content?: string;\n usage?: AIUsageInfo;\n isStreamed: boolean;\n}> {\n const {\n completion,\n modelName,\n modelDescription,\n uiTarsModelVersion,\n modelFamily,\n } = await createChatClient({\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n const temperature = modelConfig.temperature ?? 0;\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let accumulatedReasoning = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature,\n stream: !!isStreaming,\n max_tokens: maxTokens,\n ...(modelFamily === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n if (isAutoGLM(modelFamily)) {\n (commonConfig as unknown as Record<string, number>).top_p = 0.85;\n (commonConfig as unknown as Record<string, number>).frequency_penalty = 0.2;\n }\n\n const {\n config: deepThinkConfig,\n debugMessage,\n warningMessage,\n } = resolveDeepThinkConfig({\n deepThink: options?.deepThink,\n modelFamily,\n });\n if (debugMessage) {\n debugCall(debugMessage);\n }\n if (warningMessage) {\n debugCall(warningMessage);\n console.warn(warningMessage);\n }\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n accumulatedReasoning += reasoning_content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${modelFamily || 'default'}, cost-ms, ${timeCost}, temperature, ${temperature ?? ''}`,\n );\n } else {\n // Non-streaming with retry logic\n const retryCount = modelConfig.retryCount ?? 1;\n const retryInterval = modelConfig.retryInterval ?? 2000;\n const maxAttempts = retryCount + 1; // retryCount=1 means 2 total attempts (1 initial + 1 retry)\n\n let lastError: Error | undefined;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n ...deepThinkConfig,\n } as any);\n\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${modelFamily || 'default'}, ui-tars-version, ${uiTarsModelVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`,\n );\n\n debugProfileDetail(\n `model usage detail: ${JSON.stringify(result.usage)}`,\n );\n\n if (!result.choices) {\n throw new Error(\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n }\n\n content = result.choices[0].message.content!;\n if (!content) {\n throw new Error('empty content from AI model');\n }\n\n accumulatedReasoning =\n (result.choices[0].message as any)?.reasoning_content || '';\n usage = result.usage;\n break; // Success, exit retry loop\n } catch (error) {\n lastError = error as Error;\n if (attempt < maxAttempts) {\n console.warn(\n `[Midscene] AI call failed (attempt ${attempt}/${maxAttempts}), retrying in ${retryInterval}ms... Error: ${lastError.message}`,\n );\n await new Promise((resolve) => setTimeout(resolve, retryInterval));\n }\n }\n }\n\n if (!content) {\n throw lastError;\n }\n }\n\n debugCall(`response reasoning content: ${accumulatedReasoning}`);\n debugCall(`response content: ${content}`);\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n reasoning_content: accumulatedReasoning || undefined,\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service (${modelName}): ${e.message}\\nTrouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n modelConfig: IModelConfig,\n options?: {\n deepThink?: DeepThinkOption;\n },\n): Promise<{\n content: T;\n contentString: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const response = await callAI(messages, modelConfig, {\n deepThink: options?.deepThink,\n });\n assert(response, 'empty response');\n const modelFamily = modelConfig.modelFamily;\n const jsonContent = safeParseJson(response.content, modelFamily);\n assert(\n typeof jsonContent === 'object',\n `failed to parse json response from model (${modelConfig.modelName}): ${response.content}`,\n );\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n reasoning_content: response.reasoning_content,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\nexport function resolveDeepThinkConfig({\n deepThink,\n modelFamily,\n}: {\n deepThink?: DeepThinkOption;\n modelFamily?: TModelFamily;\n}): {\n config: Record<string, unknown>;\n debugMessage?: string;\n warningMessage?: string;\n} {\n const normalizedDeepThink = deepThink === 'unset' ? undefined : deepThink;\n\n if (normalizedDeepThink === undefined) {\n return { config: {}, debugMessage: undefined };\n }\n\n if (modelFamily === 'qwen3-vl') {\n return {\n config: { enable_thinking: normalizedDeepThink },\n debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`,\n };\n }\n\n if (modelFamily === 'doubao-vision') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`,\n };\n }\n\n if (modelFamily === 'glm-v') {\n return {\n config: {\n thinking: { type: normalizedDeepThink ? 'enabled' : 'disabled' },\n },\n debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for glm-v`,\n };\n }\n\n if (modelFamily === 'gpt-5') {\n return {\n config: normalizedDeepThink\n ? {\n reasoning: { effort: 'high' },\n }\n : {\n reasoning: { effort: 'low' },\n },\n debugMessage: normalizedDeepThink\n ? 'deepThink mapped to reasoning.effort=high for gpt-5'\n : 'deepThink disabled for gpt-5',\n };\n }\n\n return {\n config: {},\n debugMessage: `deepThink ignored: unsupported model_family \"${modelFamily ?? 'default'}\"`,\n warningMessage: `The \"deepThink\" option is not supported for model_family \"${modelFamily ?? 'default'}\".`,\n };\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(\n input: string,\n modelFamily: TModelFamily | undefined,\n) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n let lastError: unknown;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n\n if (modelFamily === 'doubao-vision' || isUITars(modelFamily)) {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n try {\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n } catch (error) {\n lastError = error;\n }\n }\n throw Error(\n `failed to parse LLM response into JSON. Error - ${String(\n lastError ?? 'unknown error',\n )}. Response - \\n ${input}`,\n );\n}\n"],"names":["createChatClient","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsModelVersion","modelFamily","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","sanitizeProxyUrl","url","parsed","URL","ifInBrowser","console","moduleName","ProxyAgent","socksDispatcher","proxyUrl","Error","port","Number","protocol","socksType","decodeURIComponent","error","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","temperature","isStreaming","content","accumulated","accumulatedReasoning","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","isAutoGLM","deepThinkConfig","debugMessage","warningMessage","resolveDeepThinkConfig","stream","chunk","reasoning_content","chunkData","undefined","estimatedTokens","Math","finalChunk","retryCount","retryInterval","maxAttempts","lastError","attempt","result","JSON","Promise","resolve","setTimeout","e","newError","callAIWithObjectResponse","response","assert","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","deepThink","normalizedDeepThink","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","jsonrepair","isUITars","jsonString","String"],"mappings":";;;;;;AAsBA,eAAeA,iBAAiB,EAC9BC,WAAW,EAGZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACXC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS;IAI5B,MAAMC,mBAAmB,CAACC;QACxB,IAAI;YACF,MAAMC,SAAS,IAAIC,IAAIF;YACvB,IAAIC,OAAO,QAAQ,EAAE;gBAEnBA,OAAO,QAAQ,GAAG;gBAClB,OAAOA,OAAO,IAAI;YACpB;YACA,OAAOD;QACT,EAAE,OAAM;YAEN,OAAOA;QACT;IACF;IAEA,IAAId,WAAW;QACbW,WAAW,oBAAoBE,iBAAiBb;QAChD,IAAIiB,aACFC,QAAQ,IAAI,CACV;aAEG;YAEL,MAAMC,aAAa;YACnB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;YACpCT,aAAa,IAAIU,WAAW;gBAC1B,KAAKpB;YAEP;QACF;IACF,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBE,iBAAiBd;QACjD,IAAIkB,aACFC,QAAQ,IAAI,CACV;aAGF,IAAI;YAEF,MAAMC,aAAa;YACnB,MAAM,EAAEE,eAAe,EAAE,GAAG,MAAM,MAAM,CAACF;YAEzC,MAAMG,WAAW,IAAIN,IAAIjB;YAGzB,IAAI,CAACuB,SAAS,QAAQ,EACpB,MAAM,IAAIC,MAAM;YAIlB,MAAMC,OAAOC,OAAO,QAAQ,CAACH,SAAS,IAAI,EAAE;YAC5C,IAAI,CAACA,SAAS,IAAI,IAAIG,OAAO,KAAK,CAACD,OACjC,MAAM,IAAID,MAAM;YAIlB,MAAMG,WAAWJ,SAAS,QAAQ,CAAC,OAAO,CAAC,KAAK;YAChD,MAAMK,YACJD,AAAa,aAAbA,WAAwB,IAAIA,AAAa,aAAbA,WAAwB,IAAI;YAE1DhB,aAAaW,gBAAgB;gBAC3B,MAAMM;gBACN,MAAML,SAAS,QAAQ;gBACvBE;gBACA,GAAIF,SAAS,QAAQ,GACjB;oBACE,QAAQM,mBAAmBN,SAAS,QAAQ;oBAC5C,UAAUM,mBAAmBN,SAAS,QAAQ,IAAI;gBACpD,IACA,CAAC,CAAC;YACR;YACAX,WAAW,uCAAuC;gBAChD,MAAMgB;gBACN,MAAML,SAAS,QAAQ;gBACvB,MAAME;YACR;QACF,EAAE,OAAOK,OAAO;YACdX,QAAQ,KAAK,CAAC,oCAAoCW;YAClD,MAAM,IAAIN,MACR,CAAC,yBAAyB,EAAExB,WAAW,+GAA+G,CAAC;QAE3J;IAEJ;IAEA,MAAM+B,gBAAgB;QACpB,SAAS5B;QACT,QAAQC;QAGR,GAAIO,aAAa;YAAE,cAAc;gBAAE,YAAYA;YAAkB;QAAE,IAAI,CAAC,CAAC;QACzE,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMsB,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIlB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMkB,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCH,SAASI,WAAWJ;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACI,0BAC1C;QACA,IAAIrB,aACF,MAAM,IAAIM,MAAM;QAElBL,QAAQ,GAAG,CAAC;QAEZ,MAAMqB,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCN,SAASO,cAAcP;IACzB;IAEA,IAAIzB,oBAAoB;QACtB,MAAMiC,gBAAgB,MAAMjC,mBAAmBuB,YAAYD;QAE3D,IAAIW,eACFR,SAASQ;IAEb;IAEA,OAAO;QACL,YAAYR,OAAO,IAAI,CAAC,WAAW;QACnChC;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAemC,OACpBC,QAAsC,EACtC7C,WAAyB,EACzB8C,OAIC;IAOD,MAAM,EACJC,UAAU,EACV5C,SAAS,EACTI,gBAAgB,EAChBC,kBAAkB,EAClBC,WAAW,EACZ,GAAG,MAAMV,iBAAiB;QACzBC;IACF;IAEA,MAAMgD,YACJZ,oBAAoB,yBAAyB,CAACa,8BAC9Cb,oBAAoB,yBAAyB,CAACc;IAChD,MAAMC,YAAYrC,SAAS;IAC3B,MAAMsC,oBAAoBtC,SAAS;IACnC,MAAMuC,qBAAqBvC,SAAS;IAEpC,MAAMwC,YAAYC,KAAK,GAAG;IAC1B,MAAMC,cAAcxD,YAAY,WAAW,IAAI;IAE/C,MAAMyD,cAAcX,SAAS,UAAUA,SAAS;IAChD,IAAIY;IACJ,IAAIC,cAAc;IAClB,IAAIC,uBAAuB;IAC3B,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY3D;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMkE,eAAe;QACnBV;QACA,QAAQ,CAAC,CAACC;QACV,YAAYT;QACZ,GAAIvC,AAAgB,iBAAhBA,cACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI0D,UAAU1D,cAAc;QACzByD,aAAmD,KAAK,GAAG;QAC3DA,aAAmD,iBAAiB,GAAG;IAC1E;IAEA,MAAM,EACJ,QAAQE,eAAe,EACvBC,YAAY,EACZC,cAAc,EACf,GAAGC,uBAAuB;QACzB,WAAWzB,SAAS;QACpBrC;IACF;IACA,IAAI4D,cACFlB,UAAUkB;IAEZ,IAAIC,gBAAgB;QAClBnB,UAAUmB;QACVlD,QAAQ,IAAI,CAACkD;IACf;IAEA,IAAI;QACFnB,UACE,CAAC,QAAQ,EAAEM,cAAc,eAAe,GAAG,WAAW,EAAEtD,WAAW;QAGrE,IAAIsD,aAAa;YACf,MAAMe,SAAU,MAAMzB,WAAW,MAAM,CACrC;gBACE,OAAO5C;gBACP0C;gBACA,GAAGqB,YAAY;gBACf,GAAGE,eAAe;YACpB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMK,SAASD,OAAQ;gBAChC,MAAMd,UAAUe,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbZ,QAAQY,MAAM,KAAK;gBAGrB,IAAIf,WAAWgB,mBAAmB;oBAChCf,eAAeD;oBACfE,wBAAwBc;oBACxB,MAAMC,YAAiC;wBACrCjB;wBACAgB;wBACAf;wBACA,YAAY;wBACZ,OAAOiB;oBACT;oBACA9B,QAAQ,OAAO,CAAE6B;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCX,WAAWP,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACO,OAAO;wBAEV,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACnB,YAAY,MAAM,GAAG;wBAElCE,QAAQ;4BACN,eAAegB;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTpB;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOI,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAEiC;oBACjB;gBACF;YACF;YACArB,UAAUC;YACVP,kBACE,CAAC,iBAAiB,EAAEjD,UAAU,QAAQ,EAAEM,eAAe,UAAU,WAAW,EAAEqD,SAAS,eAAe,EAAEN,eAAe,IAAI;QAE/H,OAAO;YAEL,MAAMwB,aAAahF,YAAY,UAAU,IAAI;YAC7C,MAAMiF,gBAAgBjF,YAAY,aAAa,IAAI;YACnD,MAAMkF,cAAcF,aAAa;YAEjC,IAAIG;YAEJ,IAAK,IAAIC,UAAU,GAAGA,WAAWF,aAAaE,UAC5C,IAAI;gBACF,MAAMC,SAAS,MAAMtC,WAAW,MAAM,CAAC;oBACrC,OAAO5C;oBACP0C;oBACA,GAAGqB,YAAY;oBACf,GAAGE,eAAe;gBACpB;gBAEAN,WAAWP,KAAK,GAAG,KAAKD;gBAExBF,kBACE,CAAC,OAAO,EAAEjD,UAAU,QAAQ,EAAEM,eAAe,UAAU,mBAAmB,EAAED,mBAAmB,iBAAiB,EAAE6E,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEvB,SAAS,aAAa,EAAEuB,OAAO,WAAW,IAAI,GAAG,eAAe,EAAE7B,eAAe,IAAI;gBAGxWH,mBACE,CAAC,oBAAoB,EAAEiC,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;gBAGvD,IAAI,CAACA,OAAO,OAAO,EACjB,MAAM,IAAI5D,MACR,CAAC,mCAAmC,EAAE6D,KAAK,SAAS,CAACD,SAAS;gBAIlE3B,UAAU2B,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;gBAC3C,IAAI,CAAC3B,SACH,MAAM,IAAIjC,MAAM;gBAGlBmC,uBACGyB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,EAAU,qBAAqB;gBAC3DxB,QAAQwB,OAAO,KAAK;gBACpB;YACF,EAAE,OAAOtD,OAAO;gBACdoD,YAAYpD;gBACZ,IAAIqD,UAAUF,aAAa;oBACzB9D,QAAQ,IAAI,CACV,CAAC,mCAAmC,EAAEgE,QAAQ,CAAC,EAAEF,YAAY,eAAe,EAAED,cAAc,aAAa,EAAEE,UAAU,OAAO,EAAE;oBAEhI,MAAM,IAAII,QAAQ,CAACC,UAAYC,WAAWD,SAASP;gBACrD;YACF;YAGF,IAAI,CAACvB,SACH,MAAMyB;QAEV;QAEAhC,UAAU,CAAC,4BAA4B,EAAES,sBAAsB;QAC/DT,UAAU,CAAC,kBAAkB,EAAEO,SAAS;QAGxC,IAAID,eAAe,CAACI,OAAO;YAEzB,MAAMgB,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEpB,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCG,QAAQ;gBACN,eAAegB;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASnB,WAAW;YACpB,mBAAmBE,wBAAwBgB;YAC3C,OAAOb,eAAeF;YACtB,YAAY,CAAC,CAACJ;QAChB;IACF,EAAE,OAAOiC,GAAQ;QACftE,QAAQ,KAAK,CAAC,kBAAkBsE;QAChC,MAAMC,WAAW,IAAIlE,MACnB,CAAC,eAAe,EAAEgC,cAAc,eAAe,GAAG,kBAAkB,EAAEtD,UAAU,GAAG,EAAEuF,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC9J;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpB/C,QAAsC,EACtC7C,WAAyB,EACzB8C,OAEC;IAOD,MAAM+C,WAAW,MAAMjD,OAAOC,UAAU7C,aAAa;QACnD,WAAW8C,SAAS;IACtB;IACAgD,OAAOD,UAAU;IACjB,MAAMpF,cAAcT,YAAY,WAAW;IAC3C,MAAM+F,cAAcC,cAAcH,SAAS,OAAO,EAAEpF;IACpDqF,OACE,AAAuB,YAAvB,OAAOC,aACP,CAAC,0CAA0C,EAAE/F,YAAY,SAAS,CAAC,GAAG,EAAE6F,SAAS,OAAO,EAAE;IAE5F,OAAO;QACL,SAASE;QACT,eAAeF,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;QACrB,mBAAmBA,SAAS,iBAAiB;IAC/C;AACF;AAEO,eAAeI,yBACpBC,IAAY,EACZlG,WAAyB;IAEzB,MAAM,EAAE0D,OAAO,EAAEG,KAAK,EAAE,GAAG,MAAMjB,OAAOsD,MAAMlG;IAC9C,OAAO;QAAE0D;QAASG;IAAM;AAC1B;AAEO,SAASsC,yBAAyBN,QAAgB;IACvD,IAAI;QAEF,MAAMO,YAAYP,SAAS,KAAK,CAAC;QACjC,IAAIO,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBR,SAAS,KAAK,CACnC;QAEF,IAAIQ,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBT,SAAS,KAAK,CAAC;QACrC,IAAIS,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOT;AACT;AAEO,SAASU,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAEO,SAASjC,uBAAuB,EACrCkC,SAAS,EACThG,WAAW,EAIZ;IAKC,MAAMiG,sBAAsBD,AAAc,YAAdA,YAAwB7B,SAAY6B;IAEhE,IAAIC,AAAwB9B,WAAxB8B,qBACF,OAAO;QAAE,QAAQ,CAAC;QAAG,cAAc9B;IAAU;IAG/C,IAAInE,AAAgB,eAAhBA,aACF,OAAO;QACL,QAAQ;YAAE,iBAAiBiG;QAAoB;QAC/C,cAAc,CAAC,oCAAoC,EAAEA,oBAAoB,aAAa,CAAC;IACzF;IAGF,IAAIjG,AAAgB,oBAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,kBAAkB,CAAC;IACrH;IAGF,IAAIjG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQ;YACN,UAAU;gBAAE,MAAMiG,sBAAsB,YAAY;YAAW;QACjE;QACA,cAAc,CAAC,kCAAkC,EAAEA,sBAAsB,YAAY,WAAW,UAAU,CAAC;IAC7G;IAGF,IAAIjG,AAAgB,YAAhBA,aACF,OAAO;QACL,QAAQiG,sBACJ;YACE,WAAW;gBAAE,QAAQ;YAAO;QAC9B,IACA;YACE,WAAW;gBAAE,QAAQ;YAAM;QAC7B;QACJ,cAAcA,sBACV,wDACA;IACN;IAGF,OAAO;QACL,QAAQ,CAAC;QACT,cAAc,CAAC,6CAA6C,EAAEjG,eAAe,UAAU,CAAC,CAAC;QACzF,gBAAgB,CAAC,0DAA0D,EAAEA,eAAe,UAAU,EAAE,CAAC;IAC3G;AACF;AAQA,SAASkG,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASZ,cACdQ,KAAa,EACb/F,WAAqC;IAErC,MAAM4G,kBAAkBlB,yBAAyBK;IAEjD,IAAIa,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAI1F;IAGT,IAAIV;IACJ,IAAIkE;IACJ,IAAI;QACFlE,SAASqE,KAAK,KAAK,CAAC+B;QACpB,OAAOV,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IACA,IAAI;QACFd,SAASqE,KAAK,KAAK,CAACgC,WAAWD;QAC/B,OAAOV,oBAAoB1F;IAC7B,EAAE,OAAOc,OAAO;QACdoD,YAAYpD;IACd;IAEA,IAAItB,AAAgB,oBAAhBA,eAAmC8G,SAAS9G,cAAc;QAC5D,MAAM+G,aAAajB,yBAAyBc;QAC5C,IAAI;YACFpG,SAASqE,KAAK,KAAK,CAACgC,WAAWE;YAC/B,OAAOb,oBAAoB1F;QAC7B,EAAE,OAAOc,OAAO;YACdoD,YAAYpD;QACd;IACF;IACA,MAAMN,MACJ,CAAC,gDAAgD,EAAEgG,OACjDtC,aAAa,iBACb,gBAAgB,EAAEqB,OAAO;AAE/B"}