@midscene/core 1.2.2-beta-20260119111553.0 → 1.2.2-beta-20260119114334.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs +47 -3
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +39 -45
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/device/index.mjs +3 -3
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/llm-planning.js +48 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +39 -45
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/device/index.js +3 -3
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/agent.d.ts +2 -2
- package/dist/types/ai-model/llm-planning.d.ts +5 -1
- package/dist/types/device/index.d.ts +4 -4
- package/package.json +2 -2
|
@@ -30,15 +30,6 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
30
30
|
const env_namespaceObject = require("@midscene/shared/env");
|
|
31
31
|
const zod_schema_utils_namespaceObject = require("@midscene/shared/zod-schema-utils");
|
|
32
32
|
const external_common_js_namespaceObject = require("./common.js");
|
|
33
|
-
const buildCommonOutputFields = (includeThought, preferredLanguage)=>{
|
|
34
|
-
const fields = [
|
|
35
|
-
`"note"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action. Use ${preferredLanguage}.`,
|
|
36
|
-
`"log": string, // a brief preamble to the user explaining what you're about to do. Use ${preferredLanguage}.`,
|
|
37
|
-
`"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use ${preferredLanguage}.`
|
|
38
|
-
];
|
|
39
|
-
if (includeThought) fields.unshift('"thought": string, // your thought process about the next action');
|
|
40
|
-
return fields.join('\n ');
|
|
41
|
-
};
|
|
42
33
|
const vlLocateParam = (modelFamily)=>{
|
|
43
34
|
if (modelFamily) return `{bbox: [number, number, number, number], prompt: string } // ${(0, external_common_js_namespaceObject.bboxDescription)(modelFamily)}`;
|
|
44
35
|
return "{ prompt: string /* description of the target element */ }";
|
|
@@ -106,6 +97,20 @@ async function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbo
|
|
|
106
97
|
if (includeBbox && !modelFamily) throw new Error('modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.');
|
|
107
98
|
const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, vlLocateParam(includeBbox ? modelFamily : void 0)));
|
|
108
99
|
const actionList = actionDescriptionList.join('\n');
|
|
100
|
+
const thoughtFieldInstruction = `
|
|
101
|
+
## About the \`thought\` field (reasoning process)
|
|
102
|
+
|
|
103
|
+
The \`thought\` field captures your structured reasoning about the next action. It should include:
|
|
104
|
+
|
|
105
|
+
1. **Overall task**: What is the overall instruction/goal?
|
|
106
|
+
2. **Current observation**: What do I see on the current screen?
|
|
107
|
+
3. **Progress**: What steps have been completed so far?
|
|
108
|
+
4. **Next step**: What should the next step be and why?
|
|
109
|
+
5. **Action selection**: Which Action Type should be used to accomplish this step?
|
|
110
|
+
|
|
111
|
+
**Example thought structure:**
|
|
112
|
+
"The overall task is to [task]. On the current screen, I can see [observations]. We have completed [steps]. The next step should be [next action] because [reason]. I will use [Action Type] to accomplish this."
|
|
113
|
+
`;
|
|
109
114
|
const logFieldInstruction = `
|
|
110
115
|
## About the \`log\` field (preamble message)
|
|
111
116
|
|
|
@@ -113,7 +118,7 @@ The \`log\` field is a brief preamble message to the user explaining what you're
|
|
|
113
118
|
|
|
114
119
|
- **Use ${preferredLanguage}**
|
|
115
120
|
- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).
|
|
116
|
-
- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what
|
|
121
|
+
- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.
|
|
117
122
|
- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
|
|
118
123
|
|
|
119
124
|
**Examples:**
|
|
@@ -123,11 +128,6 @@ The \`log\` field is a brief preamble message to the user explaining what you're
|
|
|
123
128
|
- "Go back to find the login button"
|
|
124
129
|
`;
|
|
125
130
|
const shouldIncludeThought = includeThought ?? true;
|
|
126
|
-
const commonOutputFields = buildCommonOutputFields(shouldIncludeThought, preferredLanguage);
|
|
127
|
-
const exampleThoughtLine = shouldIncludeThought ? ` "thought": "The form has already been filled, I need to click the login button to login",
|
|
128
|
-
` : '';
|
|
129
|
-
const exampleThoughtLineWithNote = shouldIncludeThought ? ` "thought": "I need to note the titles in the current screenshot for further processing and scroll to find more titles",
|
|
130
|
-
` : '';
|
|
131
131
|
return `
|
|
132
132
|
Target: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.
|
|
133
133
|
|
|
@@ -145,51 +145,45 @@ Please tell what the next one action is (or null if no action should be done) to
|
|
|
145
145
|
## Supporting actions
|
|
146
146
|
${actionList}
|
|
147
147
|
|
|
148
|
+
${shouldIncludeThought ? thoughtFieldInstruction : ''}
|
|
149
|
+
|
|
148
150
|
${logFieldInstruction}
|
|
149
151
|
|
|
150
152
|
## Return format
|
|
151
153
|
|
|
152
|
-
Return in
|
|
153
|
-
{
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
// k-v style parameter fields
|
|
160
|
-
},
|
|
161
|
-
} | null
|
|
162
|
-
}
|
|
154
|
+
Return in XML format with the following structure:
|
|
155
|
+
${shouldIncludeThought ? '<thought>Structured reasoning: overall task, current observation, progress, next step, action selection</thought>' : ''}
|
|
156
|
+
<note>CRITICAL: If any information from the current screenshot will be needed in follow-up actions, you MUST record it here completely. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information for later use. Examples: extracted data, element states, content that needs to be referenced. Leave empty if no follow-up information is needed.</note>
|
|
157
|
+
<log>a brief preamble to the user</log>
|
|
158
|
+
<error>error messages (optional)</error>
|
|
159
|
+
<action-type>the type of the action, or null if no action</action-type>
|
|
160
|
+
<action-param-json>JSON object containing the action parameters</action-param-json>
|
|
163
161
|
|
|
164
162
|
For example, if the instruction is to login and the form has already been filled, this is a valid return value:
|
|
165
163
|
|
|
164
|
+
${shouldIncludeThought ? '<thought>The overall task is to login. On the current screen, I can see a login form with username and password fields already filled, and a login button. We have completed filling in the credentials. The next step should be to submit the login form by clicking the login button. I will use Tap action to click it.</thought>' : ''}<log>Click the login button</log>
|
|
165
|
+
<action-type>Tap</action-type>
|
|
166
|
+
<action-param-json>
|
|
166
167
|
{
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
"type": "Tap",
|
|
170
|
-
"param": {
|
|
171
|
-
"locate": {
|
|
172
|
-
"prompt": "The login button"${modelFamily && includeBbox ? ', "bbox": [100, 200, 300, 400]' : ''}
|
|
173
|
-
}
|
|
174
|
-
}
|
|
168
|
+
"locate": {
|
|
169
|
+
"prompt": "The login button"${modelFamily && includeBbox ? ', "bbox": [100, 200, 300, 400]' : ''}
|
|
175
170
|
}
|
|
176
171
|
}
|
|
172
|
+
</action-param-json>
|
|
177
173
|
|
|
178
174
|
For example, if the instruction is to find out every title in the screenshot, the return value should be:
|
|
179
175
|
|
|
176
|
+
${shouldIncludeThought ? '<thought>The overall task is to find out every title in the screenshot. On the current screen, I can see several article titles: \'Hello, world!\', \'Midscene 101\', and \'Model strategy\'. We have just started and observed the visible titles. The next step should be to scroll down to find more titles that may not be currently visible. I will use Scroll action to reveal more content.</thought>' : ''}<note>The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'</note>
|
|
177
|
+
<log>Scroll to find more titles</log>
|
|
178
|
+
<action-type>Scroll</action-type>
|
|
179
|
+
<action-param-json>
|
|
180
180
|
{
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
"param": {
|
|
186
|
-
"locate": {
|
|
187
|
-
"prompt": "The page content area"
|
|
188
|
-
},
|
|
189
|
-
"direction": "down"
|
|
190
|
-
}
|
|
191
|
-
}
|
|
181
|
+
"locate": {
|
|
182
|
+
"prompt": "The page content area"
|
|
183
|
+
},
|
|
184
|
+
"direction": "down"
|
|
192
185
|
}
|
|
186
|
+
</action-param-json>
|
|
193
187
|
`;
|
|
194
188
|
}
|
|
195
189
|
exports.descriptionForAction = __webpack_exports__.descriptionForAction;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst buildCommonOutputFields = (\n includeThought: boolean,\n preferredLanguage: string,\n) => {\n const fields = [\n `\"note\"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action. Use ${preferredLanguage}.`,\n `\"log\": string, // a brief preamble to the user explaining what you're about to do. Use ${preferredLanguage}.`,\n `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use ${preferredLanguage}.`,\n ];\n\n if (includeThought) {\n fields.unshift(\n `\"thought\": string, // your thought process about the next action`,\n );\n }\n\n return fields.join('\\n ');\n};\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n const commonOutputFields = buildCommonOutputFields(\n shouldIncludeThought,\n preferredLanguage,\n );\n const exampleThoughtLine = shouldIncludeThought\n ? ` \"thought\": \"The form has already been filled, I need to click the login button to login\",\n`\n : '';\n const exampleThoughtLineWithNote = shouldIncludeThought\n ? ` \"thought\": \"I need to note the titles in the current screenshot for further processing and scroll to find more titles\",\n`\n : '';\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n${exampleThoughtLine} \"log\": \"Click the login button\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${modelFamily && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n{\n${exampleThoughtLineWithNote} \"note\": \"The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'\",\n \"log\": \"Scroll to find more titles\",\n \"action\": {\n \"type\": \"Scroll\",\n \"param\": {\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n }\n }\n}\n`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","buildCommonOutputFields","includeThought","preferredLanguage","fields","vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","paramLines","schema","isZodObject","shape","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","getPreferredLanguage","Error","actionDescriptionList","actionList","logFieldInstruction","shouldIncludeThought","commonOutputFields","exampleThoughtLine","exampleThoughtLineWithNote"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACOA,MAAMI,0BAA0B,CAC9BC,gBACAC;IAEA,MAAMC,SAAS;QACb,CAAC,yRAAyR,EAAED,kBAAkB,CAAC,CAAC;QAChT,CAAC,uFAAuF,EAAEA,kBAAkB,CAAC,CAAC;QAC9G,CAAC,2KAA2K,EAAEA,kBAAkB,CAAC,CAAC;KACnM;IAED,IAAID,gBACFE,OAAO,OAAO,CACZ;IAIJ,OAAOA,OAAO,IAAI,CAAC;AACrB;AAEA,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMb,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEW,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMG,aAAuB,EAAE;QAG/B,MAAMC,SAASJ,OAAO,WAAW;QAIjC,MAAMK,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACvB,KAAKa,MAAM,IAAIZ,OAAO,OAAO,CAACwB,OACxC,IAAIZ,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMa,aACJ,AACE,cADF,OAAQb,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMc,kBAAkBD,aAAa,GAAG1B,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAM4B,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAehB,OAAOO;gBAGvC,MAAMU,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBlB;gBAGtC,MAAMmB,eAAepB,iBAAiBC;gBACtC,MAAMoB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3Cd,WAAW,IAAI,CAACa;YAClB;YAIF,IAAIb,WAAW,MAAM,GAAG,GAAG;gBACzBd,OAAO,IAAI,CAAC;gBACZc,WAAW,OAAO,CAAC,CAACiB;oBAClB/B,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE+B,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeN;YAChC,MAAMO,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBR;YAGtC,IAAIiB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBhC,OAAO,IAAI,CAACgC;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAErB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMb,OAAO,IAAI,CAAC,CAAC,EAAE,EAAEa,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeoB,2BAA2B,EAC/CC,WAAW,EACXhC,WAAW,EACXiC,WAAW,EACXrC,cAAc,EAMf;IACC,MAAMC,oBAAoBqC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAG1B,IAAID,eAAe,CAACjC,aAClB,MAAM,IAAImC,MACR;IAIJ,MAAMC,wBAAwBJ,YAAY,GAAG,CAAC,CAACvB,SACtCD,qBACLC,QACAV,cAAckC,cAAcjC,cAAcwB;IAG9C,MAAMa,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,sBAAsB,CAAC;;;;;QAKvB,EAAEzC,kBAAkB;;;;;;;;;;AAU5B,CAAC;IAEC,MAAM0C,uBAAuB3C,kBAAkB;IAC/C,MAAM4C,qBAAqB7C,wBACzB4C,sBACA1C;IAEF,MAAM4C,qBAAqBF,uBACvB,CAAC;AACP,CAAC,GACK;IACJ,MAAMG,6BAA6BH,uBAC/B,CAAC;AACP,CAAC,GACK;IAEJ,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAEF,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;EAMpB,EAAEE,mBAAmB;;;;;;;;;;;;;AAavB,EAAEC,mBAAmB;;;;;oCAKe,EAAEzC,eAAeiC,cAAc,mCAAmC,GAAG;;;;;;;;;AASzG,EAAES,2BAA2B;;;;;;;;;;;;AAY7B,CAAC;AACD"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const thoughtFieldInstruction = `\n## About the \\`thought\\` field (reasoning process)\n\nThe \\`thought\\` field captures your structured reasoning about the next action. It should include:\n\n1. **Overall task**: What is the overall instruction/goal?\n2. **Current observation**: What do I see on the current screen?\n3. **Progress**: What steps have been completed so far?\n4. **Next step**: What should the next step be and why?\n5. **Action selection**: Which Action Type should be used to accomplish this step?\n\n**Example thought structure:**\n\"The overall task is to [task]. On the current screen, I can see [observations]. We have completed [steps]. The next step should be [next action] because [reason]. I will use [Action Type] to accomplish this.\"\n`;\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${shouldIncludeThought ? thoughtFieldInstruction : ''}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in XML format with the following structure:\n${shouldIncludeThought ? '<thought>Structured reasoning: overall task, current observation, progress, next step, action selection</thought>' : ''}\n<note>CRITICAL: If any information from the current screenshot will be needed in follow-up actions, you MUST record it here completely. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information for later use. Examples: extracted data, element states, content that needs to be referenced. Leave empty if no follow-up information is needed.</note>\n<log>a brief preamble to the user</log>\n<error>error messages (optional)</error>\n<action-type>the type of the action, or null if no action</action-type>\n<action-param-json>JSON object containing the action parameters</action-param-json>\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n${shouldIncludeThought ? '<thought>The overall task is to login. On the current screen, I can see a login form with username and password fields already filled, and a login button. We have completed filling in the credentials. The next step should be to submit the login form by clicking the login button. I will use Tap action to click it.</thought>' : ''}<log>Click the login button</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": {\n \"prompt\": \"The login button\"${modelFamily && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n}\n</action-param-json>\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n${shouldIncludeThought ? '<thought>The overall task is to find out every title in the screenshot. On the current screen, I can see several article titles: \\'Hello, world!\\', \\'Midscene 101\\', and \\'Model strategy\\'. We have just started and observed the visible titles. The next step should be to scroll down to find more titles that may not be currently visible. I will use Scroll action to reveal more content.</thought>' : ''}<note>The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'</note>\n<log>Scroll to find more titles</log>\n<action-type>Scroll</action-type>\n<action-param-json>\n{\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n}\n</action-param-json>\n`;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","includeThought","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","thoughtFieldInstruction","logFieldInstruction","shouldIncludeThought"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;ACKA,MAAMI,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACpB,KAAKS,MAAM,IAAIR,OAAO,OAAO,CAACqB,OACxC,IAAIb,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMc,aACJ,AACE,cADF,OAAQd,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMe,kBAAkBD,aAAa,GAAGvB,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMyB,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAejB,OAAOO;gBAGvC,MAAMW,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBnB;gBAGtC,MAAMoB,eAAerB,iBAAiBC;gBACtC,MAAMqB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3Cd,WAAW,IAAI,CAACa;YAClB;YAIF,IAAIb,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACiB;oBAClBlB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEkB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,AAAAA,IAAAA,iCAAAA,cAAAA,AAAAA,EAAeN;YAChC,MAAMO,cAAcC,AAAAA,IAAAA,iCAAAA,iBAAAA,AAAAA,EAAkBR;YAGtC,IAAIiB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBnB,OAAO,IAAI,CAACmB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEtB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeqB,2BAA2B,EAC/CC,WAAW,EACXjC,WAAW,EACXkC,WAAW,EACXC,cAAc,EAMf;IACC,MAAMC,oBAAoBC,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA;IAG1B,IAAIH,eAAe,CAAClC,aAClB,MAAM,IAAIsC,MACR;IAIJ,MAAMC,wBAAwBN,YAAY,GAAG,CAAC,CAACxB,SACtCD,qBACLC,QACAV,cAAcmC,cAAclC,cAAcyB;IAG9C,MAAMe,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,0BAA0B,CAAC;;;;;;;;;;;;;AAanC,CAAC;IAEC,MAAMC,sBAAsB,CAAC;;;;;QAKvB,EAAEN,kBAAkB;;;;;;;;;;AAU5B,CAAC;IAEC,MAAMO,uBAAuBR,kBAAkB;IAE/C,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAEK,WAAW;;AAEb,EAAEG,uBAAuBF,0BAA0B,GAAG;;AAEtD,EAAEC,oBAAoB;;;;;AAKtB,EAAEC,uBAAuB,sHAAsH,GAAG;;;;;;;;;AASlJ,EAAEA,uBAAuB,yUAAyU,GAAG;;;;;gCAKrU,EAAE3C,eAAekC,cAAc,mCAAmC,GAAG;;;;;;;AAOrG,EAAES,uBAAuB,iZAAiZ,GAAG;;;;;;;;;;;AAW7a,CAAC;AACD"}
|
package/dist/lib/device/index.js
CHANGED
|
@@ -108,11 +108,11 @@ const actionInputParamSchema = external_zod_namespaceObject.z.object({
|
|
|
108
108
|
external_zod_namespaceObject.z.number()
|
|
109
109
|
]).transform((val)=>String(val)).describe('The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.'),
|
|
110
110
|
locate: (0, external_common_js_namespaceObject.getMidsceneLocationSchema)().describe(inputLocateDescription).optional(),
|
|
111
|
-
mode: external_zod_namespaceObject.z
|
|
111
|
+
mode: external_zod_namespaceObject.z["enum"]([
|
|
112
112
|
'replace',
|
|
113
113
|
'clear',
|
|
114
|
-
'
|
|
115
|
-
]).default('replace').optional().describe('Input mode: "replace" (default) - clear the field and input the value; "
|
|
114
|
+
'append'
|
|
115
|
+
]).default('replace').optional().describe('Input mode: "replace" (default) - clear the field and input the value; "append" - append the value to existing content; "clear" - clear the field without inputting new text.')
|
|
116
116
|
});
|
|
117
117
|
const defineActionInput = (call)=>defineAction({
|
|
118
118
|
name: 'Input',
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"device/index.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/device/index.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { finalizeActionName, getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z.preprocess(\n (val) => (val === 'append' ? 'typeOnly' : val),\n z\n .enum(['replace', 'clear', 'typeOnly'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"typeOnly\" - type the value directly without clearing the field first; \"clear\" - clear the field without inputting new text.',\n ),\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'typeOnly' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling to the bottom, \"scrollToTop\" for scrolling to the top, \"scrollToRight\" for scrolling to the right, \"scrollToLeft\" for scrolling to the left',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Drag and drop (hold the mouse or finger down and move the mouse) ',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\n// Assert\nexport const actionAssertParamSchema = z.object({\n condition: z.string().describe('The condition of the assertion'),\n thought: z\n .string()\n .describe(\n 'The thought of the assertion, like \"I can see there are A, B, C elements on the page, which means ... , so the assertion is true\"',\n ),\n result: z.boolean().describe('The result of the assertion, true or false'),\n});\nexport type ActionAssertParam = {\n condition: string;\n thought: string;\n result: boolean;\n};\n\nexport const defineActionAssert = (): DeviceAction<ActionAssertParam> => {\n return defineAction<typeof actionAssertParamSchema, ActionAssertParam>({\n name: 'Print_Assert_Result',\n description:\n 'Print the result of the assertion. Use this only when the user asks for an assertion',\n paramSchema: actionAssertParamSchema,\n call: async (param) => {\n if (typeof param?.result !== 'boolean') {\n throw new Error(\n `The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`,\n );\n }\n\n getDebug('device:common-action')(\n `Assert: ${param.condition}, Thought: ${param.thought}, Result: ${param.result}`,\n );\n\n if (!param.result) {\n throw new Error(\n `Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`,\n );\n }\n },\n });\n};\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n millisecond: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n millisecond?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n call: async (param) => {\n const duration = param?.millisecond ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\n// Finalize\nexport const actionFinalizeParamSchema = z.object({\n message: z\n .string()\n .optional()\n .describe(\n 'The conclusion, data, or return value that the user needs. This message will be provided to the user when the task is finalized.',\n ),\n});\nexport type ActionFinalizeParam = {\n message?: string;\n};\n\nexport const defineActionFinalize = (): DeviceAction<ActionFinalizeParam> => {\n return defineAction<typeof actionFinalizeParamSchema, ActionFinalizeParam>({\n name: finalizeActionName,\n description:\n 'Finalize the task. You can provide the conclusion, data, or return value that the user needs in the message.',\n paramSchema: actionFinalizeParamSchema,\n call: async (param) => {\n getDebug('device:common-action')(\n `Task finalized${param?.message ? `: ${param.message}` : ''}`,\n );\n return param.message;\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n} from './device-options';\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionAssertParamSchema","defineActionAssert","param","Error","getDebug","ActionSleepParamSchema","defineActionSleep","duration","Promise","resolve","setTimeout","actionFinalizeParamSchema","defineActionFinalize","finalizeActionName"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACWO,MAAeI;AAwCtB;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC3C,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACnD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,OAAOA,6BAAAA,CAAAA,CAAAA,KACC,CAAC;QAACA,6BAAAA,CAAAA,CAAAA,MAAQ;QAAIA,6BAAAA,CAAAA,CAAAA,MAAQ;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,6BAAAA,CAAAA,CAAAA,UAAY,CAChB,CAACY,MAASA,AAAQ,aAARA,MAAmB,aAAaA,KAC1CZ,6BAAAA,CAAAA,CAAAA,OACO,CAAC;QAAC;QAAW;QAAS;KAAW,EACrC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAGR;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACbR;IACF;AAIK,MAAMY,iCAAiCf,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACrD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMgB,4BAA4B,CACvCb,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAakB;QACbZ;IACF;AAIK,MAAMc,0BAA0BjB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,YAAYA,6BAAAA,CAAAA,CAAAA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,6BAAAA,CAAAA,CAAAA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,6BAAAA,CAAAA,CAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMiB,qBAAqB,CAChCf,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaoB;QACbd;IACF;AAIK,MAAMgB,+BAA+BnB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACnD,MAAMC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;IAC3C,IAAIA,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMmB,0BAA0B,CACrCjB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasB;QACbhB;IACF;AAGK,MAAMkB,6BAA6BrB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACjD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;IAEF,UAAUD,6BAAAA,CAAAA,CAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsB,wBAAwB,CACnCnB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAawB;QACblB;IACF;AAGK,MAAMoB,yBAAyBvB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,OAAOC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,6BAAAA,CAAAA,CAAAA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,6BAAAA,CAAAA,CAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,6BAAAA,CAAAA,CAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,6BAAAA,CAAAA,CAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMwB,oBAAoB,CAC/BrB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa0B;QACbpB;IACF;AAIK,MAAMsB,8BAA8BzB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMyB,yBAAyB,CACpCvB,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAae;QACbtB;IACF;AAIK,MAAMwB,0BAA0B3B,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,WAAWA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ,CAAC;IAC/B,SAASA,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,CACP;IAEJ,QAAQA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ,CAAC;AAC/B;AAOO,MAAM4B,qBAAqB,IACzB/B,aAAgE;QACrE,MAAM;QACN,aACE;QACF,aAAa8B;QACb,MAAM,OAAOE;YACX,IAAI,AAAyB,aAAzB,OAAOA,OAAO,QAChB,MAAM,IAAIC,MACR,CAAC,wDAAwD,EAAE,OAAOD,OAAO,OAAO,EAAE,EAAEA,MAAM,OAAO,IAAI,gBAAgB;YAIzHE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBACP,CAAC,QAAQ,EAAEF,MAAM,SAAS,CAAC,WAAW,EAAEA,MAAM,OAAO,CAAC,UAAU,EAAEA,MAAM,MAAM,EAAE;YAGlF,IAAI,CAACA,MAAM,MAAM,EACf,MAAM,IAAIC,MACR,CAAC,kBAAkB,EAAED,MAAM,OAAO,IAAI,eAAe,cAAc,EAAEA,MAAM,SAAS,CAAC,CAAC,CAAC;QAG7F;IACF;AAIK,MAAMG,yBAAyBhC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,aAAaA,6BAAAA,CAAAA,CAAAA,MACJ,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMiC,oBAAoB,IACxBpC,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAamC;QACb,MAAM,OAAOH;YACX,MAAMK,WAAWL,OAAO,eAAe;YACvCE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBAAwB,CAAC,aAAa,EAAEG,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIC,QAAQ,CAACC,UAAYC,WAAWD,SAASF;QACrD;IACF;AAIK,MAAMI,4BAA4BtC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAChD,SAASA,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAKO,MAAMuC,uBAAuB,IAC3B1C,aAAoE;QACzE,MAAM2C,mCAAAA,kBAAkBA;QACxB,aACE;QACF,aAAaF;QACb,MAAM,OAAOT;YACXE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBACP,CAAC,cAAc,EAAEF,OAAO,UAAU,CAAC,EAAE,EAAEA,MAAM,OAAO,EAAE,GAAG,IAAI;YAE/D,OAAOA,MAAM,OAAO;QACtB;IACF"}
|
|
1
|
+
{"version":3,"file":"device/index.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/device/index.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { finalizeActionName, getMidsceneLocationSchema } from '@/common';\nimport type {\n ActionScrollParam,\n DeviceAction,\n LocateResultElement,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport interface FileChooserHandler {\n accept(files: string[]): Promise<void>;\n}\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[];\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n options?: {\n targetDescription?: string;\n modelConfig?: IModelConfig;\n },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // for web only\n registerFileChooserListener?(\n handler: (chooser: FileChooserHandler) => Promise<void>,\n ): Promise<{ dispose: () => void; getError: () => Error | undefined }>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\n// TReturn allows specifying the return type of the action\nexport const defineAction = <\n TSchema extends z.ZodType | undefined = undefined,\n TRuntime = TSchema extends z.ZodType ? z.infer<TSchema> : undefined,\n TReturn = any,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema?: TSchema;\n call: (param: TRuntime) => Promise<TReturn> | TReturn;\n } & Partial<\n Omit<\n DeviceAction<TRuntime, TReturn>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime, TReturn> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nconst inputLocateDescription =\n 'the position of the placeholder or text content in the target input field. If there is no content, locate the center of the input field.';\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe(inputLocateDescription)\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n scrollType: z\n .enum([\n 'singleAction',\n 'scrollToBottom',\n 'scrollToTop',\n 'scrollToRight',\n 'scrollToLeft',\n ])\n .default('singleAction')\n .describe(\n 'The scroll behavior: \"singleAction\" for a single scroll action, \"scrollToBottom\" for scrolling to the bottom, \"scrollToTop\" for scrolling to the top, \"scrollToRight\" for scrolling to the right, \"scrollToLeft\" for scrolling to the left',\n ),\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe(\n 'The direction to scroll. Only effective when scrollType is \"singleAction\".',\n ),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Describe the target element to be scrolled on, like \"the table\" or \"the list\" or \"the content area\" or \"the scrollable area\". Do NOT provide a general intent like \"scroll to find some element\"',\n ),\n});\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description:\n 'Drag and drop (hold the mouse or finger down and move the mouse) ',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The input field to be cleared')\n .optional(),\n});\nexport type ActionClearInputParam = {\n locate?: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: inputLocateDescription,\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\n// Assert\nexport const actionAssertParamSchema = z.object({\n condition: z.string().describe('The condition of the assertion'),\n thought: z\n .string()\n .describe(\n 'The thought of the assertion, like \"I can see there are A, B, C elements on the page, which means ... , so the assertion is true\"',\n ),\n result: z.boolean().describe('The result of the assertion, true or false'),\n});\nexport type ActionAssertParam = {\n condition: string;\n thought: string;\n result: boolean;\n};\n\nexport const defineActionAssert = (): DeviceAction<ActionAssertParam> => {\n return defineAction<typeof actionAssertParamSchema, ActionAssertParam>({\n name: 'Print_Assert_Result',\n description:\n 'Print the result of the assertion. Use this only when the user asks for an assertion',\n paramSchema: actionAssertParamSchema,\n call: async (param) => {\n if (typeof param?.result !== 'boolean') {\n throw new Error(\n `The result of the assertion must be a boolean, but got: ${typeof param?.result}. ${param.thought || '(no thought)'}`,\n );\n }\n\n getDebug('device:common-action')(\n `Assert: ${param.condition}, Thought: ${param.thought}, Result: ${param.result}`,\n );\n\n if (!param.result) {\n throw new Error(\n `Assertion failed: ${param.thought || '(no thought)'} (Assertion = ${param.condition})`,\n );\n }\n },\n });\n};\n\n// Sleep\nexport const ActionSleepParamSchema = z.object({\n millisecond: z\n .number()\n .default(1000)\n .optional()\n .describe('Sleep duration in milliseconds, defaults to 1000ms (1 second)'),\n});\n\nexport type ActionSleepParam = {\n millisecond?: number;\n};\n\nexport const defineActionSleep = (): DeviceAction<ActionSleepParam> => {\n return defineAction<typeof ActionSleepParamSchema, ActionSleepParam>({\n name: 'Sleep',\n description:\n 'Wait for a specified duration before continuing. Defaults to 1 second (1000ms) if not specified.',\n paramSchema: ActionSleepParamSchema,\n call: async (param) => {\n const duration = param?.millisecond ?? 1000;\n getDebug('device:common-action')(`Sleeping for ${duration}ms`);\n await new Promise((resolve) => setTimeout(resolve, duration));\n },\n });\n};\n\n// Finalize\nexport const actionFinalizeParamSchema = z.object({\n message: z\n .string()\n .optional()\n .describe(\n 'The conclusion, data, or return value that the user needs. This message will be provided to the user when the task is finalized.',\n ),\n});\nexport type ActionFinalizeParam = {\n message?: string;\n};\n\nexport const defineActionFinalize = (): DeviceAction<ActionFinalizeParam> => {\n return defineAction<typeof actionFinalizeParamSchema, ActionFinalizeParam>({\n name: finalizeActionName,\n description:\n 'Finalize the task. You can provide the conclusion, data, or return value that the user needs in the message.',\n paramSchema: actionFinalizeParamSchema,\n call: async (param) => {\n getDebug('device:common-action')(\n `Task finalized${param?.message ? `: ${param.message}` : ''}`,\n );\n return param.message;\n },\n });\n};\n\nexport type { DeviceAction } from '../types';\nexport type {\n AndroidDeviceOpt,\n AndroidDeviceInputOpt,\n IOSDeviceOpt,\n IOSDeviceInputOpt,\n} from './device-options';\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","inputLocateDescription","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput","actionAssertParamSchema","defineActionAssert","param","Error","getDebug","ActionSleepParamSchema","defineActionSleep","duration","Promise","resolve","setTimeout","actionFinalizeParamSchema","defineActionFinalize","finalizeActionName"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACWO,MAAeI;AAwCtB;AAKO,MAAMC,eAAe,CAK1BC,SAaOA;AAIF,MAAMC,uBAAuBC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC3C,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACnD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIF,MAAMO,yBACJ;AACK,MAAMC,yBAAyBX,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,OAAOA,6BAAAA,CAAAA,CAAAA,KACC,CAAC;QAACA,6BAAAA,CAAAA,CAAAA,MAAQ;QAAIA,6BAAAA,CAAAA,CAAAA,MAAQ;KAAG,EAC9B,SAAS,CAAC,CAACY,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQX,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAACS,wBACT,QAAQ;IACX,MAAMV,6BAAAA,CAAAA,CAAAA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMc,oBAAoB,CAC/BX,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAac;QACbR;IACF;AAIK,MAAMY,iCAAiCf,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACrD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMgB,4BAA4B,CACvCb,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAakB;QACbZ;IACF;AAIK,MAAMc,0BAA0BjB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,YAAYA,6BAAAA,CAAAA,CAAAA,OACL,CAAC;QACJ;QACA;QACA;QACA;QACA;KACD,EACA,OAAO,CAAC,gBACR,QAAQ,CACP;IAEJ,WAAWA,6BAAAA,CAAAA,CAAAA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CACP;IAEJ,UAAUA,6BAAAA,CAAAA,CAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,GACR,QAAQ,CACP;AAEN;AAEO,MAAMiB,qBAAqB,CAChCf,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaoB;QACbd;IACF;AAIK,MAAMgB,+BAA+BnB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACnD,MAAMC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;IAC3C,IAAIA,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMmB,0BAA0B,CACrCjB,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAasB;QACbhB;IACF;AAGK,MAAMkB,6BAA6BrB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IACjD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IAA4B,QAAQ,CAC1C;IAEF,UAAUD,6BAAAA,CAAAA,CAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMsB,wBAAwB,CACnCnB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAawB;QACblB;IACF;AAGK,MAAMoB,yBAAyBvB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,OAAOC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,6BAAAA,CAAAA,CAAAA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,6BAAAA,CAAAA,CAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,6BAAAA,CAAAA,CAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,6BAAAA,CAAAA,CAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMwB,oBAAoB,CAC/BrB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAa0B;QACbpB;IACF;AAIK,MAAMsB,8BAA8BzB,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAClD,QAAQC,AAAAA,IAAAA,mCAAAA,yBAAAA,AAAAA,IACL,QAAQ,CAAC,iCACT,QAAQ;AACb;AAKO,MAAMyB,yBAAyB,CACpCvB,OAEON,aAGL;QACA,MAAM;QACN,aAAaa;QACb,gBAAgB;QAChB,aAAae;QACbtB;IACF;AAIK,MAAMwB,0BAA0B3B,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC9C,WAAWA,6BAAAA,CAAAA,CAAAA,MAAQ,GAAG,QAAQ,CAAC;IAC/B,SAASA,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,CACP;IAEJ,QAAQA,6BAAAA,CAAAA,CAAAA,OAAS,GAAG,QAAQ,CAAC;AAC/B;AAOO,MAAM4B,qBAAqB,IACzB/B,aAAgE;QACrE,MAAM;QACN,aACE;QACF,aAAa8B;QACb,MAAM,OAAOE;YACX,IAAI,AAAyB,aAAzB,OAAOA,OAAO,QAChB,MAAM,IAAIC,MACR,CAAC,wDAAwD,EAAE,OAAOD,OAAO,OAAO,EAAE,EAAEA,MAAM,OAAO,IAAI,gBAAgB;YAIzHE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBACP,CAAC,QAAQ,EAAEF,MAAM,SAAS,CAAC,WAAW,EAAEA,MAAM,OAAO,CAAC,UAAU,EAAEA,MAAM,MAAM,EAAE;YAGlF,IAAI,CAACA,MAAM,MAAM,EACf,MAAM,IAAIC,MACR,CAAC,kBAAkB,EAAED,MAAM,OAAO,IAAI,eAAe,cAAc,EAAEA,MAAM,SAAS,CAAC,CAAC,CAAC;QAG7F;IACF;AAIK,MAAMG,yBAAyBhC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAC7C,aAAaA,6BAAAA,CAAAA,CAAAA,MACJ,GACN,OAAO,CAAC,MACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMiC,oBAAoB,IACxBpC,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAamC;QACb,MAAM,OAAOH;YACX,MAAMK,WAAWL,OAAO,eAAe;YACvCE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBAAwB,CAAC,aAAa,EAAEG,SAAS,EAAE,CAAC;YAC7D,MAAM,IAAIC,QAAQ,CAACC,UAAYC,WAAWD,SAASF;QACrD;IACF;AAIK,MAAMI,4BAA4BtC,6BAAAA,CAAAA,CAAAA,MAAQ,CAAC;IAChD,SAASA,6BAAAA,CAAAA,CAAAA,MACA,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAKO,MAAMuC,uBAAuB,IAC3B1C,aAAoE;QACzE,MAAM2C,mCAAAA,kBAAkBA;QACxB,aACE;QACF,aAAaF;QACb,MAAM,OAAOT;YACXE,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS,wBACP,CAAC,cAAc,EAAEF,OAAO,UAAU,CAAC,EAAE,EAAEA,MAAM,OAAO,EAAE,GAAG,IAAI;YAE/D,OAAOA,MAAM,OAAO;QACtB;IACF"}
|