@midscene/core 1.0.1-beta-20251205094204.0 → 1.0.1-beta-20251208031856.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/es/agent/task-builder.mjs +1 -1
  2. package/dist/es/agent/task-builder.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +1 -4
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/prompt/llm-planning.mjs +95 -20
  7. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  8. package/dist/es/ai-model/service-caller/index.mjs +39 -54
  9. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  10. package/dist/es/utils.mjs +2 -2
  11. package/dist/lib/agent/agent.js +2 -2
  12. package/dist/lib/agent/common.js +1 -1
  13. package/dist/lib/agent/execution-session.js +2 -2
  14. package/dist/lib/agent/index.js +2 -2
  15. package/dist/lib/agent/task-builder.js +3 -3
  16. package/dist/lib/agent/task-builder.js.map +1 -1
  17. package/dist/lib/agent/task-cache.js +2 -2
  18. package/dist/lib/agent/tasks.js +3 -6
  19. package/dist/lib/agent/tasks.js.map +1 -1
  20. package/dist/lib/agent/ui-utils.js +2 -2
  21. package/dist/lib/agent/utils.js +3 -3
  22. package/dist/lib/ai-model/conversation-history.js +2 -2
  23. package/dist/lib/ai-model/index.js +2 -2
  24. package/dist/lib/ai-model/inspect.js +2 -2
  25. package/dist/lib/ai-model/llm-planning.js +2 -2
  26. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  27. package/dist/lib/ai-model/prompt/common.js +2 -2
  28. package/dist/lib/ai-model/prompt/describe.js +2 -2
  29. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  30. package/dist/lib/ai-model/prompt/llm-locator.js +2 -2
  31. package/dist/lib/ai-model/prompt/llm-planning.js +100 -22
  32. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  33. package/dist/lib/ai-model/prompt/llm-section-locator.js +2 -2
  34. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +2 -2
  35. package/dist/lib/ai-model/prompt/playwright-generator.js +2 -2
  36. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  37. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  38. package/dist/lib/ai-model/prompt/util.js +2 -2
  39. package/dist/lib/ai-model/prompt/yaml-generator.js +2 -2
  40. package/dist/lib/ai-model/service-caller/index.js +43 -55
  41. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  42. package/dist/lib/ai-model/ui-tars-planning.js +2 -2
  43. package/dist/lib/common.js +2 -2
  44. package/dist/lib/device/device-options.js +1 -1
  45. package/dist/lib/device/index.js +2 -2
  46. package/dist/lib/image/index.js +2 -2
  47. package/dist/lib/index.js +2 -2
  48. package/dist/lib/report.js +2 -2
  49. package/dist/lib/service/index.js +2 -2
  50. package/dist/lib/service/utils.js +2 -2
  51. package/dist/lib/task-runner.js +2 -2
  52. package/dist/lib/tree.js +2 -2
  53. package/dist/lib/types.js +3 -3
  54. package/dist/lib/utils.js +4 -4
  55. package/dist/lib/yaml/builder.js +2 -2
  56. package/dist/lib/yaml/index.js +4 -4
  57. package/dist/lib/yaml/player.js +2 -2
  58. package/dist/lib/yaml/utils.js +2 -2
  59. package/dist/lib/yaml.js +1 -1
  60. package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -0
  61. package/dist/types/ai-model/service-caller/index.d.ts +3 -1
  62. package/package.json +5 -5
@@ -24,8 +24,9 @@ var __webpack_require__ = {};
24
24
  var __webpack_exports__ = {};
25
25
  __webpack_require__.r(__webpack_exports__);
26
26
  __webpack_require__.d(__webpack_exports__, {
27
+ systemPromptToTaskPlanning: ()=>systemPromptToTaskPlanning,
27
28
  descriptionForAction: ()=>descriptionForAction,
28
- systemPromptToTaskPlanning: ()=>systemPromptToTaskPlanning
29
+ planSchema: ()=>planSchema
29
30
  });
30
31
  const external_common_js_namespaceObject = require("../../common.js");
31
32
  const external_common_js_namespaceObject_1 = require("./common.js");
@@ -163,7 +164,7 @@ Please tell what the next one action is (or null if no action should be done) to
163
164
  - Make sure the previous actions are completed successfully before performing the next step
164
165
  - If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the "error" field to the error message.
165
166
  - If there is nothing to do but waiting, set the "sleep" field to the positive waiting time in milliseconds and null for the "action" field.
166
- - Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion in the "log" field like this: "Assert: <condition>. I can see <...>, and I think <...>, so the assertion is <truthy / falsy>". If the assertion is falsy, think this an fatal error and set the reason into the "error" field.
167
+ - When the next step is to assert something, this is a very important step, you should think about it carefully and give a solid result. Write your result in the "log" field like this: "Assert: <condition>. I think <...>, so the result is <true / false>". You don't need to give the next one action when you are asserting something. If the assertion result is false, think this an fatal error and set the reason into the "error" field. If the assertion result is true, you can continue to the next step.
167
168
 
168
169
  ## Supporting actions
169
170
  ${actionList}
@@ -178,36 +179,113 @@ Return in JSON format:
178
179
  ${commonOutputFields}
179
180
  "action":
180
181
  {
181
- "type": string, // the type of the action
182
- "param"?: { // The parameter of the action, if any
183
- // k-v style parameter fields
184
- },
182
+ // one of the supporting actions
185
183
  } | null,
186
184
  ,
187
185
  "sleep"?: number, // The sleep time after the action, in milliseconds.
188
186
  }
189
-
190
- For example, if the instruction is to login and the form has already been filled, this is a valid return value:
191
-
192
- {
193
- "log": "Click the login button",
194
- "more_actions_needed_by_instruction": false,
195
- "action": {
196
- "type": "Tap",
197
- "param": {
198
- "locate": {
199
- "prompt": "The login button"${vlMode ? ', "bbox": [100, 200, 300, 400]' : ''}
200
- }
201
- }
202
- }
203
187
  `;
204
188
  }
189
+ const planSchema = {
190
+ type: 'json_schema',
191
+ json_schema: {
192
+ name: 'action_items',
193
+ strict: false,
194
+ schema: {
195
+ type: 'object',
196
+ strict: false,
197
+ properties: {
198
+ actions: {
199
+ type: 'array',
200
+ items: {
201
+ type: 'object',
202
+ strict: false,
203
+ properties: {
204
+ thought: {
205
+ type: 'string',
206
+ description: 'Reasons for generating this task, and why this task is feasible on this page'
207
+ },
208
+ type: {
209
+ type: 'string',
210
+ description: 'Type of action'
211
+ },
212
+ param: {
213
+ anyOf: [
214
+ {
215
+ type: 'null'
216
+ },
217
+ {
218
+ type: 'object',
219
+ additionalProperties: true
220
+ }
221
+ ],
222
+ description: 'Parameter of the action'
223
+ },
224
+ locate: {
225
+ type: [
226
+ 'object',
227
+ 'null'
228
+ ],
229
+ properties: {
230
+ id: {
231
+ type: 'string'
232
+ },
233
+ prompt: {
234
+ type: 'string'
235
+ }
236
+ },
237
+ required: [
238
+ 'id',
239
+ 'prompt'
240
+ ],
241
+ additionalProperties: false,
242
+ description: 'Location information for the target element'
243
+ }
244
+ },
245
+ required: [
246
+ 'thought',
247
+ 'type',
248
+ 'param',
249
+ 'locate'
250
+ ],
251
+ additionalProperties: false
252
+ },
253
+ description: 'List of actions to be performed'
254
+ },
255
+ more_actions_needed_by_instruction: {
256
+ type: 'boolean',
257
+ description: 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.'
258
+ },
259
+ log: {
260
+ type: 'string',
261
+ description: 'Log what these planned actions do. Do not include further actions that have not been planned.'
262
+ },
263
+ error: {
264
+ type: [
265
+ 'string',
266
+ 'null'
267
+ ],
268
+ description: 'Error messages about unexpected situations'
269
+ }
270
+ },
271
+ required: [
272
+ 'actions',
273
+ 'more_actions_needed_by_instruction',
274
+ 'log',
275
+ 'error'
276
+ ],
277
+ additionalProperties: false
278
+ }
279
+ }
280
+ };
205
281
  exports.descriptionForAction = __webpack_exports__.descriptionForAction;
282
+ exports.planSchema = __webpack_exports__.planSchema;
206
283
  exports.systemPromptToTaskPlanning = __webpack_exports__.systemPromptToTaskPlanning;
207
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
284
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
208
285
  "descriptionForAction",
286
+ "planSchema",
209
287
  "systemPromptToTaskPlanning"
210
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
288
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
211
289
  Object.defineProperty(exports, '__esModule', {
212
290
  value: true
213
291
  });
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion in the \"log\" field like this: \"Assert: <condition>. I can see <...>, and I think <...>, so the assertion is <truthy / falsy>\". If the assertion is falsy, think this an fatal error and set the reason into the \"error\" field.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n \"log\": \"Click the login button\",\n \"more_actions_needed_by_instruction\": false,\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACEA,MAAMI,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACvB,KAAKmB,MAAM,IAAIlB,OAAO,OAAO,CAACgB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AAAqC,cAArC,OAAQf,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMgB,kBAAkBD,aAAa,GAAGlC,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMsB,WAAWJ,YAAYC;gBAG7B,MAAMiB,cAAcH,eAAed;gBAGnC,IAAIkB,YAAY,GAAGF,gBAAgB,EAAE,EAAEb,UAAU;gBACjD,IAAIc,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCtB,WAAW,IAAI,CAACuB;YAClB;YAIF,IAAIvB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACwB;oBAClBzB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEyB,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiBxB,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC1C,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC/C,IAAIiB,AAAmB,iBAAnBA,gBAAiCjB,WAAW;YAGrD,MAAMc,cAAc,iBAAiBrB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAIyB,mBAAmB,CAAC,SAAS,EAAElB,UAAU;YAC7C,IAAIc,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB3B,OAAO,IAAI,CAAC2B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAE9B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe6B,2BAA2B,EAC/CC,WAAW,EACXnC,MAAM,EACNoC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACpC,QAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAChC,SACtCD,qBACLC,QACAJ,cAAcqC,cAAcpC,SAASuC;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE3C,mBAAmB;;;;;;;;;;;;;;;;;;;;;oCAqBa,EAAEE,SAAS,mCAAmC,GAAG;;;;AAIrF,CAAC;AACD"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- When the next step is to assert something, this is a very important step, you should think about it carefully and give a solid result. Write your result in the \"log\" field like this: \"Assert: <condition>. I think <...>, so the result is <true / false>\". You don't need to give the next one action when you are asserting something. If the assertion result is false, think this an fatal error and set the reason into the \"error\" field. If the assertion result is true, you can continue to the next step.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction","planSchema"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACGA,MAAMI,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACvB,KAAKmB,MAAM,IAAIlB,OAAO,OAAO,CAACgB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AAAqC,cAArC,OAAQf,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMgB,kBAAkBD,aAAa,GAAGlC,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMsB,WAAWJ,YAAYC;gBAG7B,MAAMiB,cAAcH,eAAed;gBAGnC,IAAIkB,YAAY,GAAGF,gBAAgB,EAAE,EAAEb,UAAU;gBACjD,IAAIc,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCtB,WAAW,IAAI,CAACuB;YAClB;YAIF,IAAIvB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACwB;oBAClBzB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEyB,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiBxB,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC1C,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC/C,IAAIiB,AAAmB,iBAAnBA,gBAAiCjB,WAAW;YAGrD,MAAMc,cAAc,iBAAiBrB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAIyB,mBAAmB,CAAC,SAAS,EAAElB,UAAU;YAC7C,IAAIc,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB3B,OAAO,IAAI,CAAC2B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAE9B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe6B,2BAA2B,EAC/CC,WAAW,EACXnC,MAAM,EACNoC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACpC,QAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAChC,SACtCD,qBACLC,QACAJ,cAAcqC,cAAcpC,SAASuC;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE3C,mBAAmB;;;;;;;;AAQvB,CAAC;AACD;AAEO,MAAM4C,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}
@@ -68,10 +68,10 @@ If the description is "delete button on the second row with title 'Peter'", retu
68
68
  const sectionLocatorInstruction = (sectionDescription)=>`Find section containing: ${sectionDescription}`;
69
69
  exports.sectionLocatorInstruction = __webpack_exports__.sectionLocatorInstruction;
70
70
  exports.systemPromptToLocateSection = __webpack_exports__.systemPromptToLocateSection;
71
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
71
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
72
72
  "sectionLocatorInstruction",
73
73
  "systemPromptToLocateSection"
74
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
74
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
75
75
  Object.defineProperty(exports, '__esModule', {
76
76
  value: true
77
77
  });
@@ -61,10 +61,10 @@ Return true if the description is order-sensitive, false otherwise.
61
61
  const orderSensitiveJudgePrompt = (description)=>`Analyze this element description: "${description}"`;
62
62
  exports.orderSensitiveJudgePrompt = __webpack_exports__.orderSensitiveJudgePrompt;
63
63
  exports.systemPromptToJudgeOrderSensitive = __webpack_exports__.systemPromptToJudgeOrderSensitive;
64
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
64
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
65
65
  "orderSensitiveJudgePrompt",
66
66
  "systemPromptToJudgeOrderSensitive"
67
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
67
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
68
68
  Object.defineProperty(exports, '__esModule', {
69
69
  value: true
70
70
  });
@@ -159,7 +159,7 @@ exports.getScreenshotsForLLM = __webpack_exports__.getScreenshotsForLLM;
159
159
  exports.prepareEventSummary = __webpack_exports__.prepareEventSummary;
160
160
  exports.processEventsForLLM = __webpack_exports__.processEventsForLLM;
161
161
  exports.validateEvents = __webpack_exports__.validateEvents;
162
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
162
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
163
163
  "createEventCounts",
164
164
  "createMessageContent",
165
165
  "extractInputDescriptions",
@@ -170,7 +170,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
170
170
  "prepareEventSummary",
171
171
  "processEventsForLLM",
172
172
  "validateEvents"
173
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
173
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
174
174
  Object.defineProperty(exports, '__esModule', {
175
175
  value: true
176
176
  });
@@ -58,9 +58,9 @@ call_user() # Submit the task and call the user when the task is unsolvable, or
58
58
  `;
59
59
  }
60
60
  exports.systemPromptToLocateElementPosition = __webpack_exports__.systemPromptToLocateElementPosition;
61
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
61
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
62
62
  "systemPromptToLocateElementPosition"
63
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
63
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
64
64
  Object.defineProperty(exports, '__esModule', {
65
65
  value: true
66
66
  });
@@ -62,10 +62,10 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
62
62
  const getSummary = (prediction)=>prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, '').trim();
63
63
  exports.getSummary = __webpack_exports__.getSummary;
64
64
  exports.getUiTarsPlanningPrompt = __webpack_exports__.getUiTarsPlanningPrompt;
65
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
65
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
66
66
  "getSummary",
67
67
  "getUiTarsPlanningPrompt"
68
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
68
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
69
69
  Object.defineProperty(exports, '__esModule', {
70
70
  value: true
71
71
  });
@@ -73,14 +73,14 @@ exports.describeUserPage = __webpack_exports__.describeUserPage;
73
73
  exports.distance = __webpack_exports__.distance;
74
74
  exports.distanceThreshold = __webpack_exports__.distanceThreshold;
75
75
  exports.samplePageDescription = __webpack_exports__.samplePageDescription;
76
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
76
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
77
77
  "describeElement",
78
78
  "describeSize",
79
79
  "describeUserPage",
80
80
  "distance",
81
81
  "distanceThreshold",
82
82
  "samplePageDescription"
83
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
83
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
84
84
  Object.defineProperty(exports, '__esModule', {
85
85
  value: true
86
86
  });
@@ -261,7 +261,7 @@ exports.getScreenshotsForLLM = __webpack_exports__.getScreenshotsForLLM;
261
261
  exports.prepareEventSummary = __webpack_exports__.prepareEventSummary;
262
262
  exports.processEventsForLLM = __webpack_exports__.processEventsForLLM;
263
263
  exports.validateEvents = __webpack_exports__.validateEvents;
264
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
264
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
265
265
  "createEventCounts",
266
266
  "createMessageContent",
267
267
  "extractInputDescriptions",
@@ -272,7 +272,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
272
272
  "prepareEventSummary",
273
273
  "processEventsForLLM",
274
274
  "validateEvents"
275
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
275
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
276
276
  Object.defineProperty(exports, '__esModule', {
277
277
  value: true
278
278
  });
@@ -38,78 +38,37 @@ __webpack_require__.d(__webpack_exports__, {
38
38
  preprocessDoubaoBboxJson: ()=>preprocessDoubaoBboxJson,
39
39
  callAIWithStringResponse: ()=>callAIWithStringResponse,
40
40
  safeParseJson: ()=>safeParseJson,
41
+ getResponseFormat: ()=>getResponseFormat,
41
42
  callAI: ()=>callAI
42
43
  });
44
+ const external_types_js_namespaceObject = require("../../types.js");
43
45
  const env_namespaceObject = require("@midscene/shared/env");
44
46
  const logger_namespaceObject = require("@midscene/shared/logger");
45
47
  const utils_namespaceObject = require("@midscene/shared/utils");
48
+ const external_https_proxy_agent_namespaceObject = require("https-proxy-agent");
46
49
  const external_jsonrepair_namespaceObject = require("jsonrepair");
47
50
  const external_openai_namespaceObject = require("openai");
48
51
  var external_openai_default = /*#__PURE__*/ __webpack_require__.n(external_openai_namespaceObject);
52
+ const external_socks_proxy_agent_namespaceObject = require("socks-proxy-agent");
53
+ const external_common_js_namespaceObject = require("../../common.js");
54
+ const assertion_js_namespaceObject = require("../prompt/assertion.js");
55
+ const llm_planning_js_namespaceObject = require("../prompt/llm-planning.js");
49
56
  async function createChatClient({ AIActionTypeValue, modelConfig }) {
50
57
  const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, createOpenAIClient, timeout } = modelConfig;
51
58
  let proxyAgent;
52
59
  const debugProxy = (0, logger_namespaceObject.getDebug)('ai:call:proxy');
53
- const sanitizeProxyUrl = (url)=>{
54
- try {
55
- const parsed = new URL(url);
56
- if (parsed.username) {
57
- parsed.password = '****';
58
- return parsed.href;
59
- }
60
- return url;
61
- } catch {
62
- return url;
63
- }
64
- };
65
60
  if (httpProxy) {
66
- debugProxy('using http proxy', sanitizeProxyUrl(httpProxy));
67
- if (utils_namespaceObject.ifInBrowser) console.warn('HTTP proxy is configured but not supported in browser environment');
68
- else {
69
- const moduleName = 'undici';
70
- const { ProxyAgent } = await import(moduleName);
71
- proxyAgent = new ProxyAgent({
72
- uri: httpProxy
73
- });
74
- }
61
+ debugProxy('using http proxy', httpProxy);
62
+ proxyAgent = new external_https_proxy_agent_namespaceObject.HttpsProxyAgent(httpProxy);
75
63
  } else if (socksProxy) {
76
- debugProxy('using socks proxy', sanitizeProxyUrl(socksProxy));
77
- if (utils_namespaceObject.ifInBrowser) console.warn('SOCKS proxy is configured but not supported in browser environment');
78
- else try {
79
- const moduleName = 'fetch-socks';
80
- const { socksDispatcher } = await import(moduleName);
81
- const proxyUrl = new URL(socksProxy);
82
- if (!proxyUrl.hostname) throw new Error('SOCKS proxy URL must include a valid hostname');
83
- const port = Number.parseInt(proxyUrl.port, 10);
84
- if (!proxyUrl.port || Number.isNaN(port)) throw new Error('SOCKS proxy URL must include a valid port');
85
- const protocol = proxyUrl.protocol.replace(':', '');
86
- const socksType = 'socks4' === protocol ? 4 : 'socks5' === protocol ? 5 : 5;
87
- proxyAgent = socksDispatcher({
88
- type: socksType,
89
- host: proxyUrl.hostname,
90
- port,
91
- ...proxyUrl.username ? {
92
- userId: decodeURIComponent(proxyUrl.username),
93
- password: decodeURIComponent(proxyUrl.password || '')
94
- } : {}
95
- });
96
- debugProxy('socks proxy configured successfully', {
97
- type: socksType,
98
- host: proxyUrl.hostname,
99
- port: port
100
- });
101
- } catch (error) {
102
- console.error('Failed to configure SOCKS proxy:', error);
103
- throw new Error(`Invalid SOCKS proxy URL: ${socksProxy}. Expected format: socks4://host:port, socks5://host:port, or with authentication: socks5://user:pass@host:port`);
104
- }
64
+ debugProxy('using socks proxy', socksProxy);
65
+ proxyAgent = new external_socks_proxy_agent_namespaceObject.SocksProxyAgent(socksProxy);
105
66
  }
106
67
  const openAIOptions = {
107
68
  baseURL: openaiBaseURL,
108
69
  apiKey: openaiApiKey,
109
70
  ...proxyAgent ? {
110
- fetchOptions: {
111
- dispatcher: proxyAgent
112
- }
71
+ httpAgent: proxyAgent
113
72
  } : {},
114
73
  ...openaiExtraConfig,
115
74
  ...'number' == typeof timeout ? {
@@ -150,6 +109,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
150
109
  AIActionTypeValue,
151
110
  modelConfig
152
111
  });
112
+ const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
153
113
  const maxTokens = env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.MIDSCENE_MODEL_MAX_TOKENS) ?? env_namespaceObject.globalConfigManager.getEnvConfigValue(env_namespaceObject.OPENAI_MAX_TOKENS);
154
114
  const debugCall = (0, logger_namespaceObject.getDebug)('ai:call');
155
115
  const debugProfileStats = (0, logger_namespaceObject.getDebug)('ai:profile:stats');
@@ -188,6 +148,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
188
148
  const stream = await completion.create({
189
149
  model: modelName,
190
150
  messages,
151
+ response_format: responseFormat,
191
152
  ...commonConfig
192
153
  }, {
193
154
  stream: true
@@ -234,6 +195,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
234
195
  const result = await completion.create({
235
196
  model: modelName,
236
197
  messages,
198
+ response_format: responseFormat,
237
199
  ...commonConfig
238
200
  });
239
201
  timeCost = Date.now() - startTime;
@@ -266,6 +228,30 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
266
228
  throw newError;
267
229
  }
268
230
  }
231
+ const getResponseFormat = (modelName, AIActionTypeValue)=>{
232
+ let responseFormat;
233
+ if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
234
+ case external_common_js_namespaceObject.AIActionType.ASSERT:
235
+ responseFormat = assertion_js_namespaceObject.assertSchema;
236
+ break;
237
+ case external_common_js_namespaceObject.AIActionType.PLAN:
238
+ responseFormat = llm_planning_js_namespaceObject.planSchema;
239
+ break;
240
+ case external_common_js_namespaceObject.AIActionType.EXTRACT_DATA:
241
+ case external_common_js_namespaceObject.AIActionType.DESCRIBE_ELEMENT:
242
+ responseFormat = {
243
+ type: external_types_js_namespaceObject.AIResponseFormat.JSON
244
+ };
245
+ break;
246
+ case external_common_js_namespaceObject.AIActionType.TEXT:
247
+ responseFormat = void 0;
248
+ break;
249
+ }
250
+ if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== external_common_js_namespaceObject.AIActionType.TEXT) responseFormat = {
251
+ type: external_types_js_namespaceObject.AIResponseFormat.JSON
252
+ };
253
+ return responseFormat;
254
+ };
269
255
  async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
270
256
  const response = await callAI(messages, AIActionTypeValue, modelConfig);
271
257
  (0, utils_namespaceObject.assert)(response, 'empty response');
@@ -338,16 +324,18 @@ exports.callAI = __webpack_exports__.callAI;
338
324
  exports.callAIWithObjectResponse = __webpack_exports__.callAIWithObjectResponse;
339
325
  exports.callAIWithStringResponse = __webpack_exports__.callAIWithStringResponse;
340
326
  exports.extractJSONFromCodeBlock = __webpack_exports__.extractJSONFromCodeBlock;
327
+ exports.getResponseFormat = __webpack_exports__.getResponseFormat;
341
328
  exports.preprocessDoubaoBboxJson = __webpack_exports__.preprocessDoubaoBboxJson;
342
329
  exports.safeParseJson = __webpack_exports__.safeParseJson;
343
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
330
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
344
331
  "callAI",
345
332
  "callAIWithObjectResponse",
346
333
  "callAIWithStringResponse",
347
334
  "extractJSONFromCodeBlock",
335
+ "getResponseFormat",
348
336
  "preprocessDoubaoBboxJson",
349
337
  "safeParseJson"
350
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
338
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
351
339
  Object.defineProperty(exports, '__esModule', {
352
340
  value: true
353
341
  });