@midscene/core 1.0.1-beta-20251208112226.0 → 1.0.1-beta-20251209112631.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/es/agent/agent.mjs.map +1 -1
  2. package/dist/es/agent/tasks.mjs.map +1 -1
  3. package/dist/es/agent/utils.mjs +1 -1
  4. package/dist/es/ai-model/prompt/llm-planning.mjs +5 -62
  5. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  6. package/dist/es/device/index.mjs +1 -1
  7. package/dist/es/device/index.mjs.map +1 -1
  8. package/dist/es/utils.mjs +2 -2
  9. package/dist/lib/agent/agent.js +2 -2
  10. package/dist/lib/agent/agent.js.map +1 -1
  11. package/dist/lib/agent/common.js +1 -1
  12. package/dist/lib/agent/execution-session.js +2 -2
  13. package/dist/lib/agent/index.js +2 -2
  14. package/dist/lib/agent/task-builder.js +2 -2
  15. package/dist/lib/agent/task-cache.js +2 -2
  16. package/dist/lib/agent/tasks.js +2 -2
  17. package/dist/lib/agent/tasks.js.map +1 -1
  18. package/dist/lib/agent/ui-utils.js +2 -2
  19. package/dist/lib/agent/utils.js +3 -3
  20. package/dist/lib/ai-model/conversation-history.js +2 -2
  21. package/dist/lib/ai-model/index.js +2 -2
  22. package/dist/lib/ai-model/inspect.js +2 -2
  23. package/dist/lib/ai-model/llm-planning.js +2 -2
  24. package/dist/lib/ai-model/prompt/assertion.js +2 -2
  25. package/dist/lib/ai-model/prompt/common.js +2 -2
  26. package/dist/lib/ai-model/prompt/describe.js +2 -2
  27. package/dist/lib/ai-model/prompt/extraction.js +2 -2
  28. package/dist/lib/ai-model/prompt/llm-locator.js +2 -2
  29. package/dist/lib/ai-model/prompt/llm-planning.js +9 -66
  30. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  31. package/dist/lib/ai-model/prompt/llm-section-locator.js +2 -2
  32. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +2 -2
  33. package/dist/lib/ai-model/prompt/playwright-generator.js +2 -2
  34. package/dist/lib/ai-model/prompt/ui-tars-locator.js +2 -2
  35. package/dist/lib/ai-model/prompt/ui-tars-planning.js +2 -2
  36. package/dist/lib/ai-model/prompt/util.js +2 -2
  37. package/dist/lib/ai-model/prompt/yaml-generator.js +2 -2
  38. package/dist/lib/ai-model/service-caller/index.js +2 -2
  39. package/dist/lib/ai-model/ui-tars-planning.js +2 -2
  40. package/dist/lib/common.js +2 -2
  41. package/dist/lib/device/device-options.js +1 -1
  42. package/dist/lib/device/index.js +3 -3
  43. package/dist/lib/device/index.js.map +1 -1
  44. package/dist/lib/image/index.js +2 -2
  45. package/dist/lib/index.js +2 -2
  46. package/dist/lib/report.js +2 -2
  47. package/dist/lib/service/index.js +2 -2
  48. package/dist/lib/service/utils.js +2 -2
  49. package/dist/lib/task-runner.js +2 -2
  50. package/dist/lib/tree.js +2 -2
  51. package/dist/lib/types.js +3 -3
  52. package/dist/lib/utils.js +4 -4
  53. package/dist/lib/yaml/builder.js +2 -2
  54. package/dist/lib/yaml/index.js +4 -4
  55. package/dist/lib/yaml/player.js +2 -2
  56. package/dist/lib/yaml/utils.js +2 -2
  57. package/dist/lib/yaml.js +1 -1
  58. package/dist/types/agent/agent.d.ts +7 -7
  59. package/dist/types/agent/tasks.d.ts +1 -1
  60. package/package.json +3 -3
@@ -123,7 +123,7 @@ exports.pullParamStr = __webpack_exports__.pullParamStr;
123
123
  exports.scrollParamStr = __webpack_exports__.scrollParamStr;
124
124
  exports.taskTitleStr = __webpack_exports__.taskTitleStr;
125
125
  exports.typeStr = __webpack_exports__.typeStr;
126
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
126
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
127
127
  "extractInsightParam",
128
128
  "locateParamStr",
129
129
  "paramStr",
@@ -131,7 +131,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
131
131
  "scrollParamStr",
132
132
  "taskTitleStr",
133
133
  "typeStr"
134
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
134
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
135
135
  Object.defineProperty(exports, '__esModule', {
136
136
  value: true
137
137
  });
@@ -147,7 +147,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
147
147
  return;
148
148
  }
149
149
  }
150
- const getMidsceneVersion = ()=>"1.0.1-beta-20251208112226.0";
150
+ const getMidsceneVersion = ()=>"1.0.1-beta-20251209112631.0";
151
151
  const parsePrompt = (prompt)=>{
152
152
  if ('string' == typeof prompt) return {
153
153
  textPrompt: prompt,
@@ -171,7 +171,7 @@ exports.matchElementFromCache = __webpack_exports__.matchElementFromCache;
171
171
  exports.matchElementFromPlan = __webpack_exports__.matchElementFromPlan;
172
172
  exports.parsePrompt = __webpack_exports__.parsePrompt;
173
173
  exports.printReportMsg = __webpack_exports__.printReportMsg;
174
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
174
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
175
175
  "commonContextParser",
176
176
  "generateCacheId",
177
177
  "getCurrentExecutionFile",
@@ -182,7 +182,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
182
182
  "matchElementFromPlan",
183
183
  "parsePrompt",
184
184
  "printReportMsg"
185
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
185
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
186
186
  Object.defineProperty(exports, '__esModule', {
187
187
  value: true
188
188
  });
@@ -94,9 +94,9 @@ class ConversationHistory {
94
94
  }
95
95
  }
96
96
  exports.ConversationHistory = __webpack_exports__.ConversationHistory;
97
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
97
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
98
98
  "ConversationHistory"
99
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
99
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
100
100
  Object.defineProperty(exports, '__esModule', {
101
101
  value: true
102
102
  });
@@ -93,7 +93,7 @@ exports.plan = __webpack_exports__.plan;
93
93
  exports.resizeImageForUiTars = __webpack_exports__.resizeImageForUiTars;
94
94
  exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
95
95
  exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
96
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
96
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
97
97
  "AIActionType",
98
98
  "AiExtractElementInfo",
99
99
  "AiJudgeOrderSensitive",
@@ -123,7 +123,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
123
123
  "resizeImageForUiTars",
124
124
  "systemPromptToLocateElement",
125
125
  "uiTarsPlanning"
126
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
126
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
127
127
  Object.defineProperty(exports, '__esModule', {
128
128
  value: true
129
129
  });
@@ -307,12 +307,12 @@ exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
307
307
  exports.AiJudgeOrderSensitive = __webpack_exports__.AiJudgeOrderSensitive;
308
308
  exports.AiLocateElement = __webpack_exports__.AiLocateElement;
309
309
  exports.AiLocateSection = __webpack_exports__.AiLocateSection;
310
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
310
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
311
311
  "AiExtractElementInfo",
312
312
  "AiJudgeOrderSensitive",
313
313
  "AiLocateElement",
314
314
  "AiLocateSection"
315
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
315
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
316
316
  Object.defineProperty(exports, '__esModule', {
317
317
  value: true
318
318
  });
@@ -146,9 +146,9 @@ async function plan(userInstruction, opts) {
146
146
  return returnValue;
147
147
  }
148
148
  exports.plan = __webpack_exports__.plan;
149
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
149
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
150
150
  "plan"
151
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
151
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
152
152
  Object.defineProperty(exports, '__esModule', {
153
153
  value: true
154
154
  });
@@ -55,9 +55,9 @@ const assertSchema = {
55
55
  }
56
56
  };
57
57
  exports.assertSchema = __webpack_exports__.assertSchema;
58
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
58
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
59
59
  "assertSchema"
60
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
60
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
61
61
  Object.defineProperty(exports, '__esModule', {
62
62
  value: true
63
63
  });
@@ -31,9 +31,9 @@ function bboxDescription(vlMode) {
31
31
  return '2d bounding box as [xmin, ymin, xmax, ymax]';
32
32
  }
33
33
  exports.bboxDescription = __webpack_exports__.bboxDescription;
34
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
34
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
35
35
  "bboxDescription"
36
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
36
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
37
37
  Object.defineProperty(exports, '__esModule', {
38
38
  value: true
39
39
  });
@@ -68,9 +68,9 @@ Return JSON:
68
68
  "error"?: "error message if any"
69
69
  }`;
70
70
  exports.elementDescriberInstruction = __webpack_exports__.elementDescriberInstruction;
71
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
71
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
72
72
  "elementDescriberInstruction"
73
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
73
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
74
74
  Object.defineProperty(exports, '__esModule', {
75
75
  value: true
76
76
  });
@@ -157,11 +157,11 @@ const extractDataSchema = {
157
157
  exports.extractDataQueryPrompt = __webpack_exports__.extractDataQueryPrompt;
158
158
  exports.extractDataSchema = __webpack_exports__.extractDataSchema;
159
159
  exports.systemPromptToExtract = __webpack_exports__.systemPromptToExtract;
160
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
160
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
161
161
  "extractDataQueryPrompt",
162
162
  "extractDataSchema",
163
163
  "systemPromptToExtract"
164
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
164
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
165
165
  Object.defineProperty(exports, '__esModule', {
166
166
  value: true
167
167
  });
@@ -70,10 +70,10 @@ When no element is found:
70
70
  const findElementPrompt = (targetElementDescription)=>`Find: ${targetElementDescription}`;
71
71
  exports.findElementPrompt = __webpack_exports__.findElementPrompt;
72
72
  exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
73
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
73
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
74
74
  "findElementPrompt",
75
75
  "systemPromptToLocateElement"
76
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
76
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
77
77
  Object.defineProperty(exports, '__esModule', {
78
78
  value: true
79
79
  });
@@ -27,12 +27,12 @@ __webpack_require__.d(__webpack_exports__, {
27
27
  descriptionForAction: ()=>descriptionForAction,
28
28
  systemPromptToTaskPlanning: ()=>systemPromptToTaskPlanning
29
29
  });
30
- const external_common_js_namespaceObject = require("../../common.js");
31
- const external_common_js_namespaceObject_1 = require("./common.js");
30
+ const zod_schema_utils_namespaceObject = require("@midscene/shared/zod-schema-utils");
31
+ const external_common_js_namespaceObject = require("./common.js");
32
32
  const commonOutputFields = `"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.
33
33
  "more_actions_needed_by_instruction": boolean, // Consider if there is still more action(s) to do after the action in "Log" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;
34
34
  const vlLocateParam = (vlMode)=>{
35
- if (vlMode) return `{bbox: [number, number, number, number], prompt: string } // ${(0, external_common_js_namespaceObject_1.bboxDescription)(vlMode)}`;
35
+ if (vlMode) return `{bbox: [number, number, number, number], prompt: string } // ${(0, external_common_js_namespaceObject.bboxDescription)(vlMode)}`;
36
36
  return "{ prompt: string /* description of the target element */ }";
37
37
  };
38
38
  const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
@@ -45,64 +45,11 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
45
45
  const isZodObject = schema._def?.typeName === 'ZodObject';
46
46
  if (isZodObject && schema.shape) {
47
47
  const shape = schema.shape;
48
- const getTypeName = (field)=>{
49
- const unwrapField = (f)=>{
50
- if (!f._def) return f;
51
- const typeName = f._def.typeName;
52
- if ('ZodOptional' === typeName || 'ZodNullable' === typeName || 'ZodDefault' === typeName) return unwrapField(f._def.innerType);
53
- if ('ZodEffects' === typeName) {
54
- if (f._def.schema) return unwrapField(f._def.schema);
55
- }
56
- return f;
57
- };
58
- const actualField = unwrapField(field);
59
- const fieldTypeName = actualField._def?.typeName;
60
- if ('ZodString' === fieldTypeName) return 'string';
61
- if ('ZodNumber' === fieldTypeName) return 'number';
62
- if ('ZodBoolean' === fieldTypeName) return 'boolean';
63
- if ('ZodArray' === fieldTypeName) return 'array';
64
- if ('ZodObject' === fieldTypeName) {
65
- if ((0, external_common_js_namespaceObject.ifMidsceneLocatorField)(actualField)) return locatorSchemaTypeDescription;
66
- return 'object';
67
- }
68
- if ('ZodEnum' === fieldTypeName) {
69
- const values = actualField._def?.values?.map((option)=>String(`'${option}'`)).join(', ') ?? 'enum';
70
- return `enum(${values})`;
71
- }
72
- if ('ZodUnion' === fieldTypeName) {
73
- const options = actualField._def?.options;
74
- if (options && options.length > 0) {
75
- const types = options.map((opt)=>getTypeName(opt));
76
- return types.join(' | ');
77
- }
78
- return 'union';
79
- }
80
- console.warn('failed to parse Zod type. This may lead to wrong params from the LLM.\n', actualField._def);
81
- return actualField.toString();
82
- };
83
- const getDescription = (field)=>{
84
- const unwrapField = (f)=>{
85
- if (!f._def) return f;
86
- const typeName = f._def.typeName;
87
- if ('ZodOptional' === typeName || 'ZodNullable' === typeName || 'ZodDefault' === typeName) return unwrapField(f._def.innerType);
88
- if ('ZodEffects' === typeName) {
89
- if (f._def.schema) return unwrapField(f._def.schema);
90
- }
91
- return f;
92
- };
93
- if ("description" in field) return field.description || null;
94
- const actualField = unwrapField(field);
95
- if ("description" in actualField) return actualField.description || null;
96
- if (actualField._def?.typeName === 'ZodObject') {
97
- if ('midscene_location_field_flag' in actualField._def.shape()) return 'Location information for the target element';
98
- }
99
- return null;
100
- };
101
48
  for (const [key, field] of Object.entries(shape))if (field && 'object' == typeof field) {
102
49
  const isOptional = 'function' == typeof field.isOptional && field.isOptional();
103
50
  const keyWithOptional = isOptional ? `${key}?` : key;
104
- const typeName = getTypeName(field);
105
- const description = getDescription(field);
51
+ const typeName = (0, zod_schema_utils_namespaceObject.getZodTypeName)(field, locatorSchemaTypeDescription);
52
+ const description = (0, zod_schema_utils_namespaceObject.getZodDescription)(field);
106
53
  let paramLine = `${keyWithOptional}: ${typeName}`;
107
54
  if (description) paramLine += ` // ${description}`;
108
55
  paramLines.push(paramLine);
@@ -114,12 +61,8 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
114
61
  });
115
62
  }
116
63
  } else {
117
- const schemaTypeName = schema._def?.typeName;
118
- let typeName = 'unknown';
119
- if ('ZodString' === schemaTypeName) typeName = 'string';
120
- else if ('ZodNumber' === schemaTypeName) typeName = 'number';
121
- else if ('ZodBoolean' === schemaTypeName) typeName = 'boolean';
122
- const description = "description" in schema ? schema.description : null;
64
+ const typeName = (0, zod_schema_utils_namespaceObject.getZodTypeName)(schema);
65
+ const description = (0, zod_schema_utils_namespaceObject.getZodDescription)(schema);
123
66
  let paramDescription = `- param: ${typeName}`;
124
67
  if (description) paramDescription += ` // ${description}`;
125
68
  paramDescription += ' (pass the value directly, not as an object)';
@@ -204,10 +147,10 @@ For example, if the instruction is to login and the form has already been filled
204
147
  }
205
148
  exports.descriptionForAction = __webpack_exports__.descriptionForAction;
206
149
  exports.systemPromptToTaskPlanning = __webpack_exports__.systemPromptToTaskPlanning;
207
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
150
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
208
151
  "descriptionForAction",
209
152
  "systemPromptToTaskPlanning"
210
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
153
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
211
154
  Object.defineProperty(exports, '__esModule', {
212
155
  value: true
213
156
  });
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n \"log\": \"Click the login button\",\n \"more_actions_needed_by_instruction\": false,\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACEA,MAAMI,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,qCAAAA,eAAAA,AAAAA,EAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACvB,KAAKmB,MAAM,IAAIlB,OAAO,OAAO,CAACgB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AAAqC,cAArC,OAAQf,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMgB,kBAAkBD,aAAa,GAAGlC,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMsB,WAAWJ,YAAYC;gBAG7B,MAAMiB,cAAcH,eAAed;gBAGnC,IAAIkB,YAAY,GAAGF,gBAAgB,EAAE,EAAEb,UAAU;gBACjD,IAAIc,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCtB,WAAW,IAAI,CAACuB;YAClB;YAIF,IAAIvB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACwB;oBAClBzB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEyB,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiBxB,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC1C,IAAIiB,AAAmB,gBAAnBA,gBAAgCjB,WAAW;iBAC/C,IAAIiB,AAAmB,iBAAnBA,gBAAiCjB,WAAW;YAGrD,MAAMc,cAAc,iBAAiBrB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAIyB,mBAAmB,CAAC,SAAS,EAAElB,UAAU;YAC7C,IAAIc,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB3B,OAAO,IAAI,CAAC2B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAE9B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe6B,2BAA2B,EAC/CC,WAAW,EACXnC,MAAM,EACNoC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACpC,QAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAChC,SACtCD,qBACLC,QACAJ,cAAcqC,cAAcpC,SAASuC;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE3C,mBAAmB;;;;;;;;;;;;;;;;;;;;;oCAqBa,EAAEE,SAAS,mCAAmC,GAAG;;;;AAIrF,CAAC;AACD"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n \"log\": \"Click the login button\",\n \"more_actions_needed_by_instruction\": false,\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","field","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","paramLine","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;ACMA,MAAMI,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACf,KAAKkB,MAAM,IAAIjB,OAAO,OAAO,CAACgB,OACxC,IAAIC,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMC,aACJ,AACE,cADF,OAAQD,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAME,kBAAkBD,aAAa,GAAGnB,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMqB,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeJ,OAAOP;gBAGvC,MAAMY,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBN;gBAGtC,IAAIO,YAAY,GAAGL,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,IAAIE,aACFE,aAAa,CAAC,IAAI,EAAEF,aAAa;gBAGnCT,WAAW,IAAI,CAACW;YAClB;YAIF,IAAIX,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACY;oBAClBb,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEa,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAML,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeP;YAChC,MAAMQ,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBT;YAGtC,IAAIY,mBAAmB,CAAC,SAAS,EAAEN,UAAU;YAC7C,IAAIE,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpBd,OAAO,IAAI,CAACc;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEjB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAegB,2BAA2B,EAC/CC,WAAW,EACXtB,MAAM,EACNuB,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACvB,QAClB,MAAM,IAAIwB,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAACnB,SACtCD,qBACLC,QACAJ,cAAcwB,cAAcvB,SAAS0B;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE9B,mBAAmB;;;;;;;;;;;;;;;;;;;;;oCAqBa,EAAEE,SAAS,mCAAmC,GAAG;;;;AAIrF,CAAC;AACD"}
@@ -68,10 +68,10 @@ If the description is "delete button on the second row with title 'Peter'", retu
68
68
  const sectionLocatorInstruction = (sectionDescription)=>`Find section containing: ${sectionDescription}`;
69
69
  exports.sectionLocatorInstruction = __webpack_exports__.sectionLocatorInstruction;
70
70
  exports.systemPromptToLocateSection = __webpack_exports__.systemPromptToLocateSection;
71
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
71
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
72
72
  "sectionLocatorInstruction",
73
73
  "systemPromptToLocateSection"
74
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
74
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
75
75
  Object.defineProperty(exports, '__esModule', {
76
76
  value: true
77
77
  });
@@ -61,10 +61,10 @@ Return true if the description is order-sensitive, false otherwise.
61
61
  const orderSensitiveJudgePrompt = (description)=>`Analyze this element description: "${description}"`;
62
62
  exports.orderSensitiveJudgePrompt = __webpack_exports__.orderSensitiveJudgePrompt;
63
63
  exports.systemPromptToJudgeOrderSensitive = __webpack_exports__.systemPromptToJudgeOrderSensitive;
64
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
64
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
65
65
  "orderSensitiveJudgePrompt",
66
66
  "systemPromptToJudgeOrderSensitive"
67
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
67
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
68
68
  Object.defineProperty(exports, '__esModule', {
69
69
  value: true
70
70
  });
@@ -159,7 +159,7 @@ exports.getScreenshotsForLLM = __webpack_exports__.getScreenshotsForLLM;
159
159
  exports.prepareEventSummary = __webpack_exports__.prepareEventSummary;
160
160
  exports.processEventsForLLM = __webpack_exports__.processEventsForLLM;
161
161
  exports.validateEvents = __webpack_exports__.validateEvents;
162
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
162
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
163
163
  "createEventCounts",
164
164
  "createMessageContent",
165
165
  "extractInputDescriptions",
@@ -170,7 +170,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
170
170
  "prepareEventSummary",
171
171
  "processEventsForLLM",
172
172
  "validateEvents"
173
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
173
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
174
174
  Object.defineProperty(exports, '__esModule', {
175
175
  value: true
176
176
  });
@@ -58,9 +58,9 @@ call_user() # Submit the task and call the user when the task is unsolvable, or
58
58
  `;
59
59
  }
60
60
  exports.systemPromptToLocateElementPosition = __webpack_exports__.systemPromptToLocateElementPosition;
61
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
61
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
62
62
  "systemPromptToLocateElementPosition"
63
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
63
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
64
64
  Object.defineProperty(exports, '__esModule', {
65
65
  value: true
66
66
  });
@@ -62,10 +62,10 @@ finished(content='xxx') # Use escape characters \\', \\", and \\n in content par
62
62
  const getSummary = (prediction)=>prediction.replace(/Reflection:[\s\S]*?(?=Action_Summary:|Action:|$)/g, '').trim();
63
63
  exports.getSummary = __webpack_exports__.getSummary;
64
64
  exports.getUiTarsPlanningPrompt = __webpack_exports__.getUiTarsPlanningPrompt;
65
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
65
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
66
66
  "getSummary",
67
67
  "getUiTarsPlanningPrompt"
68
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
68
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
69
69
  Object.defineProperty(exports, '__esModule', {
70
70
  value: true
71
71
  });
@@ -73,14 +73,14 @@ exports.describeUserPage = __webpack_exports__.describeUserPage;
73
73
  exports.distance = __webpack_exports__.distance;
74
74
  exports.distanceThreshold = __webpack_exports__.distanceThreshold;
75
75
  exports.samplePageDescription = __webpack_exports__.samplePageDescription;
76
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
76
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
77
77
  "describeElement",
78
78
  "describeSize",
79
79
  "describeUserPage",
80
80
  "distance",
81
81
  "distanceThreshold",
82
82
  "samplePageDescription"
83
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
83
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
84
84
  Object.defineProperty(exports, '__esModule', {
85
85
  value: true
86
86
  });
@@ -261,7 +261,7 @@ exports.getScreenshotsForLLM = __webpack_exports__.getScreenshotsForLLM;
261
261
  exports.prepareEventSummary = __webpack_exports__.prepareEventSummary;
262
262
  exports.processEventsForLLM = __webpack_exports__.processEventsForLLM;
263
263
  exports.validateEvents = __webpack_exports__.validateEvents;
264
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
264
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
265
265
  "createEventCounts",
266
266
  "createMessageContent",
267
267
  "extractInputDescriptions",
@@ -272,7 +272,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
272
272
  "prepareEventSummary",
273
273
  "processEventsForLLM",
274
274
  "validateEvents"
275
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
275
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
276
276
  Object.defineProperty(exports, '__esModule', {
277
277
  value: true
278
278
  });
@@ -340,14 +340,14 @@ exports.callAIWithStringResponse = __webpack_exports__.callAIWithStringResponse;
340
340
  exports.extractJSONFromCodeBlock = __webpack_exports__.extractJSONFromCodeBlock;
341
341
  exports.preprocessDoubaoBboxJson = __webpack_exports__.preprocessDoubaoBboxJson;
342
342
  exports.safeParseJson = __webpack_exports__.safeParseJson;
343
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
343
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
344
344
  "callAI",
345
345
  "callAIWithObjectResponse",
346
346
  "callAIWithStringResponse",
347
347
  "extractJSONFromCodeBlock",
348
348
  "preprocessDoubaoBboxJson",
349
349
  "safeParseJson"
350
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
350
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
351
351
  Object.defineProperty(exports, '__esModule', {
352
352
  value: true
353
353
  });
@@ -288,10 +288,10 @@ async function resizeImageForUiTars(imageBase64, size, uiTarsVersion) {
288
288
  }
289
289
  exports.resizeImageForUiTars = __webpack_exports__.resizeImageForUiTars;
290
290
  exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
291
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
291
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
292
292
  "resizeImageForUiTars",
293
293
  "uiTarsPlanning"
294
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
294
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
295
295
  Object.defineProperty(exports, '__esModule', {
296
296
  value: true
297
297
  });
@@ -413,7 +413,7 @@ exports.markupImageForLLM = __webpack_exports__.markupImageForLLM;
413
413
  exports.mergeRects = __webpack_exports__.mergeRects;
414
414
  exports.normalized01000 = __webpack_exports__.normalized01000;
415
415
  exports.parseActionParam = __webpack_exports__.parseActionParam;
416
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
416
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
417
417
  "AIActionType",
418
418
  "PointSchema",
419
419
  "RectSchema",
@@ -438,7 +438,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
438
438
  "mergeRects",
439
439
  "normalized01000",
440
440
  "parseActionParam"
441
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
441
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
442
442
  Object.defineProperty(exports, '__esModule', {
443
443
  value: true
444
444
  });
@@ -12,7 +12,7 @@ var __webpack_require__ = {};
12
12
  })();
13
13
  var __webpack_exports__ = {};
14
14
  __webpack_require__.r(__webpack_exports__);
15
- for(var __webpack_i__ in __webpack_exports__)exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
15
+ for(var __rspack_i in __webpack_exports__)exports[__rspack_i] = __webpack_exports__[__rspack_i];
16
16
  Object.defineProperty(exports, '__esModule', {
17
17
  value: true
18
18
  });
@@ -214,7 +214,7 @@ const defineActionAssert = ()=>defineAction({
214
214
  call: async (param)=>{
215
215
  if ('boolean' != typeof param?.result) throw new Error(`The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`);
216
216
  (0, logger_namespaceObject.getDebug)('device:common-action')(`Assert: ${param.condition}, Thought: ${param.thought}, Result: ${param.result}`);
217
- if (!param.result) throw new Error(`Assertion failed: ${param.thought || '(no thought)'}. (Assertion = ${param.condition})`);
217
+ if (!param.result) throw new Error(`Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`);
218
218
  }
219
219
  });
220
220
  exports.AbstractInterface = __webpack_exports__.AbstractInterface;
@@ -243,7 +243,7 @@ exports.defineActionRightClick = __webpack_exports__.defineActionRightClick;
243
243
  exports.defineActionScroll = __webpack_exports__.defineActionScroll;
244
244
  exports.defineActionSwipe = __webpack_exports__.defineActionSwipe;
245
245
  exports.defineActionTap = __webpack_exports__.defineActionTap;
246
- for(var __webpack_i__ in __webpack_exports__)if (-1 === [
246
+ for(var __rspack_i in __webpack_exports__)if (-1 === [
247
247
  "AbstractInterface",
248
248
  "ActionLongPressParamSchema",
249
249
  "ActionSwipeParamSchema",
@@ -270,7 +270,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
270
270
  "defineActionScroll",
271
271
  "defineActionSwipe",
272
272
  "defineActionTap"
273
- ].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
273
+ ].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
274
274
  Object.defineProperty(exports, '__esModule', {
275
275
  value: true
276
276
  });