@midscene/core 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +1 -3
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +1 -1
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +37 -6
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/service/index.mjs +10 -6
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/task-runner.mjs +6 -1
- package/dist/es/task-runner.mjs.map +1 -1
- package/dist/es/utils.mjs +27 -12
- package/dist/es/utils.mjs.map +1 -1
- package/dist/lib/agent/agent.js +1 -3
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +1 -1
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +37 -6
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/service/index.js +10 -6
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/task-runner.js +6 -1
- package/dist/lib/task-runner.js.map +1 -1
- package/dist/lib/utils.js +26 -11
- package/dist/lib/utils.js.map +1 -1
- package/dist/types/service/index.d.ts +1 -1
- package/dist/types/utils.d.ts +1 -1
- package/package.json +2 -2
|
@@ -77,7 +77,16 @@ async function systemPromptToTaskPlanning({ actionSpace, modelFamily, includeBbo
|
|
|
77
77
|
}`;
|
|
78
78
|
const step1Title = shouldIncludeSubGoals ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)' : '## Step 1: Observe (related tags: <thought>)';
|
|
79
79
|
const step1Description = shouldIncludeSubGoals ? "First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot." : 'First, observe the current screenshot and previous logs to understand the current state.';
|
|
80
|
-
const
|
|
80
|
+
const explicitInstructionRule = 'CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: "fill out the form" means only fill fields, do NOT submit; "click the button" means only click, do NOT wait for page load or verify results; "type \'hello\'" means only type, do NOT press Enter.';
|
|
81
|
+
const thoughtTagDescription = shouldIncludeSubGoals ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.
|
|
82
|
+
|
|
83
|
+
Include your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.
|
|
84
|
+
|
|
85
|
+
${explicitInstructionRule}` : `REQUIRED: You MUST always output the <thought> tag. Never skip it.
|
|
86
|
+
|
|
87
|
+
Include your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.
|
|
88
|
+
|
|
89
|
+
${explicitInstructionRule}`;
|
|
81
90
|
const subGoalTags = shouldIncludeSubGoals ? `
|
|
82
91
|
|
|
83
92
|
* <update-plan-content> tag
|
|
@@ -144,7 +153,7 @@ ${step1Title}
|
|
|
144
153
|
|
|
145
154
|
${step1Description}
|
|
146
155
|
|
|
147
|
-
* <thought> tag
|
|
156
|
+
* <thought> tag (REQUIRED)
|
|
148
157
|
|
|
149
158
|
${thoughtTagDescription}
|
|
150
159
|
${subGoalTags}
|
|
@@ -159,12 +168,34 @@ Don't use this tag if no information needs to be preserved.
|
|
|
159
168
|
|
|
160
169
|
Based on the current screenshot${shouldIncludeSubGoals ? ' and the status of all sub-goals' : ''}, determine if the entire task is completed.
|
|
161
170
|
|
|
171
|
+
### CRITICAL: The User's Instruction is the Supreme Authority
|
|
172
|
+
|
|
173
|
+
The user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.
|
|
174
|
+
|
|
175
|
+
**Explicit instructions vs. High-level goals:**
|
|
176
|
+
- If the user gives you **explicit operation steps** (e.g., "click X", "type Y", "fill out the form"), treat them as exact commands. Execute ONLY those steps, nothing more.
|
|
177
|
+
- If the user gives you a **high-level goal** (e.g., "log in to the system", "complete the purchase"), you may determine the necessary steps to achieve it.
|
|
178
|
+
|
|
179
|
+
**What "goal accomplished" means:**
|
|
180
|
+
- The goal is accomplished when you have done EXACTLY what the user asked - no extra steps, no assumptions.
|
|
181
|
+
- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.
|
|
182
|
+
|
|
183
|
+
**Examples - Explicit instructions (execute exactly, no extra steps):**
|
|
184
|
+
- "fill out the form" → Goal accomplished when all fields are filled. Do NOT submit the form.
|
|
185
|
+
- "click the login button" → Goal accomplished once clicked. Do NOT wait for page load or verify login success.
|
|
186
|
+
- "type 'hello' in the search box" → Goal accomplished when 'hello' is typed. Do NOT press Enter or trigger search.
|
|
187
|
+
- "select the first item" → Goal accomplished when selected. Do NOT proceed to checkout.
|
|
188
|
+
|
|
189
|
+
**Special case - Assertion instructions:**
|
|
190
|
+
- If the user's instruction includes an assertion (e.g., "verify that...", "check that...", "assert..."), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark the goal as failed (success="false").
|
|
191
|
+
|
|
192
|
+
### Output Rules
|
|
193
|
+
|
|
194
|
+
- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.
|
|
162
195
|
- Use the <complete-goal success="true|false">message</complete-goal> tag to output the result if the goal is accomplished or failed.
|
|
163
196
|
- the 'success' attribute is required. It means whether the expected goal is accomplished based on what you observe in the current screenshot. No matter what actions were executed or what errors occurred during execution, if the expected goal is accomplished, set success="true". If the expected goal is not accomplished and cannot be accomplished, set success="false".
|
|
164
197
|
- the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.
|
|
165
|
-
- If the user's instruction includes an assertion (e.g., "verify that...", "check that...", "assert..."), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, you should use <complete-goal success="false">reason</complete-goal> to indicate the checkpoint failed, and explain why the assertion failed in the message.
|
|
166
198
|
- If you output <complete-goal>, do NOT output <action-type> or <action-param-json>. The task ends here.
|
|
167
|
-
- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.
|
|
168
199
|
|
|
169
200
|
## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)
|
|
170
201
|
|
|
@@ -222,9 +253,9 @@ For example:
|
|
|
222
253
|
|
|
223
254
|
Return in XML format following this decision flow:
|
|
224
255
|
|
|
225
|
-
**Always include:**
|
|
256
|
+
**Always include (REQUIRED):**
|
|
226
257
|
<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->
|
|
227
|
-
<thought
|
|
258
|
+
<thought>Your thought process here. NEVER skip this tag.</thought>
|
|
228
259
|
${shouldIncludeSubGoals ? `
|
|
229
260
|
<!-- required when no update-plan-content is provided in the previous response -->
|
|
230
261
|
<update-plan-content>...</update-plan-content>
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n // Generate locate object examples based on includeBbox\n const locateExample1 = includeBbox\n ? `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\",\n \"bbox\": [345, 442, 458, 483]\n }`\n : `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\"\n }`;\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? \"Include your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\"\n : 'Include your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.';\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n const noteStepNumber = shouldIncludeSubGoals ? 2 : 2;\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 3;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 4;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n\n* <thought> tag\n\n${thoughtTagDescription}\n${subGoalTags}\n\n## Step ${noteStepNumber}: Note Data from Current Screenshot (related tags: <note>)\n\nWhile observing the current screenshot, if you notice any information that might be needed in follow-up actions, record it here. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information. Examples: extracted data, element states, content that needs to be referenced.\n\nDon't use this tag if no information needs to be preserved.\n\n## Step ${checkGoalStepNumber}: Check if Goal is Accomplished (related tags: <complete-goal>)\n\nBased on the current screenshot${shouldIncludeSubGoals ? ' and the status of all sub-goals' : ''}, determine if the entire task is completed.\n\n- Use the <complete-goal success=\"true|false\">message</complete-goal> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. It means whether the expected goal is accomplished based on what you observe in the current screenshot. No matter what actions were executed or what errors occurred during execution, if the expected goal is accomplished, set success=\"true\". If the expected goal is not accomplished and cannot be accomplished, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n - If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, you should use <complete-goal success=\"false\">reason</complete-goal> to indicate the checkpoint failed, and explain why the assertion failed in the message.\n- If you output <complete-goal>, do NOT output <action-type> or <action-param-json>. The task ends here.\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete-goal' is NOT a valid action-type.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include:**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>...</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}\n<!-- Step ${noteStepNumber}: Note data from current screenshot if needed -->\n<note>...</note>\n\n**Then choose ONE of the following paths:**\n\n**Path A: If the goal is accomplished or failed (Step ${checkGoalStepNumber})**\n<complete-goal success=\"true|false\">...</complete-goal>\n\n**Path B: If the goal is NOT complete yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n`;\n}\n"],"names":["vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample1","step1Title","step1Description","thoughtTagDescription","subGoalTags","noteStepNumber","checkGoalStepNumber","actionStepNumber"],"mappings":";;;AAUA,MAAMA,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKd,MAAM,IAAIe,OAAO,OAAO,CAACF,OACxC,IAAIb,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMgB,aACJ,AACE,cADF,OAAQhB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMiB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAenB,OAAOO;gBAGvC,MAAMa,cAAcC,kBAAkBrB;gBAGtC,MAAMsB,eAAevB,iBAAiBC;gBACtC,MAAMuB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeuB,2BAA2B,EAC/CC,WAAW,EACXnC,WAAW,EACXoC,WAAW,EACXC,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC;IAG1B,IAAIJ,eAAe,CAACpC,aAClB,MAAM,IAAIyC,MACR;IAIJ,MAAMC,wBAAwBP,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBACLC,QACAV,cAAcqC,cAAcpC,cAAc2B;IAG9C,MAAMgB,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBN,mBAAmB;IAGjD,MAAMO,iBAAiBT,cACnB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAMF,MAAMU,aAAaF,wBACf,uGACA;IAEJ,MAAMG,mBAAmBH,wBACrB,wNACA;IAEJ,MAAMI,wBAAwBJ,wBAC1B,qSACA;IAEJ,MAAMK,cAAcL,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAGJ,MAAMM,iBAAiBN,wBAAwB,IAAI;IACnD,MAAMO,sBAAsBP,wBAAwB,IAAI;IACxD,MAAMQ,mBAAmBR,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEE,WAAW;;AAEb,EAAEC,iBAAiB;;;;AAInB,EAAEC,sBAAsB;AACxB,EAAEC,YAAY;;QAEN,EAAEC,eAAe;;;;;;QAMjB,EAAEC,oBAAoB;;+BAEC,EAAEP,wBAAwB,qCAAqC,GAAG;;;;;;;sEAO3B,EAAEQ,iBAAiB;;QAEjF,EAAEA,iBAAiB;;mGAEwE,EAAER,wBAAwB,kBAAkB,GAAG;;;;;;;;;;AAUlJ,EAAED,WAAW;;;;;;QAML,EAAEJ,kBAAkB;;;;;;;;;;;;;;;;;;;YAmBhB,EAAEM,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAED,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,GACL;UACS,EAAEM,eAAe;;;;;sDAK2B,EAAEC,oBAAoB;;;gDAG5B,EAAEC,iBAAiB;;;;;;;;AAQnE,CAAC;AACD"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n // Generate locate object examples based on includeBbox\n const locateExample1 = includeBbox\n ? `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\",\n \"bbox\": [345, 442, 458, 483]\n }`\n : `{\n \"prompt\": \"Add to cart button for Sauce Labs Backpack\"\n }`;\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: \"fill out the form\" means only fill fields, do NOT submit; \"click the button\" means only click, do NOT wait for page load or verify results; \"type 'hello'\" means only type, do NOT press Enter.`;\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`\n : `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`;\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n const noteStepNumber = shouldIncludeSubGoals ? 2 : 2;\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 3;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 4;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalTags}\n\n## Step ${noteStepNumber}: Note Data from Current Screenshot (related tags: <note>)\n\nWhile observing the current screenshot, if you notice any information that might be needed in follow-up actions, record it here. The current screenshot will NOT be available in subsequent steps, so this note is your only way to preserve essential information. Examples: extracted data, element states, content that needs to be referenced.\n\nDon't use this tag if no information needs to be preserved.\n\n## Step ${checkGoalStepNumber}: Check if Goal is Accomplished (related tags: <complete-goal>)\n\nBased on the current screenshot${shouldIncludeSubGoals ? ' and the status of all sub-goals' : ''}, determine if the entire task is completed.\n\n### CRITICAL: The User's Instruction is the Supreme Authority\n\nThe user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.\n\n**Explicit instructions vs. High-level goals:**\n- If the user gives you **explicit operation steps** (e.g., \"click X\", \"type Y\", \"fill out the form\"), treat them as exact commands. Execute ONLY those steps, nothing more.\n- If the user gives you a **high-level goal** (e.g., \"log in to the system\", \"complete the purchase\"), you may determine the necessary steps to achieve it.\n\n**What \"goal accomplished\" means:**\n- The goal is accomplished when you have done EXACTLY what the user asked - no extra steps, no assumptions.\n- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.\n\n**Examples - Explicit instructions (execute exactly, no extra steps):**\n- \"fill out the form\" → Goal accomplished when all fields are filled. Do NOT submit the form.\n- \"click the login button\" → Goal accomplished once clicked. Do NOT wait for page load or verify login success.\n- \"type 'hello' in the search box\" → Goal accomplished when 'hello' is typed. Do NOT press Enter or trigger search.\n- \"select the first item\" → Goal accomplished when selected. Do NOT proceed to checkout.\n\n**Special case - Assertion instructions:**\n- If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark the goal as failed (success=\"false\").\n\n### Output Rules\n\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n- Use the <complete-goal success=\"true|false\">message</complete-goal> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. It means whether the expected goal is accomplished based on what you observe in the current screenshot. No matter what actions were executed or what errors occurred during execution, if the expected goal is accomplished, set success=\"true\". If the expected goal is not accomplished and cannot be accomplished, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n- If you output <complete-goal>, do NOT output <action-type> or <action-param-json>. The task ends here.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete-goal' is NOT a valid action-type.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include (REQUIRED):**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}\n<!-- Step ${noteStepNumber}: Note data from current screenshot if needed -->\n<note>...</note>\n\n**Then choose ONE of the following paths:**\n\n**Path A: If the goal is accomplished or failed (Step ${checkGoalStepNumber})**\n<complete-goal success=\"true|false\">...</complete-goal>\n\n**Path B: If the goal is NOT complete yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n`;\n}\n"],"names":["vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample1","step1Title","step1Description","explicitInstructionRule","thoughtTagDescription","subGoalTags","noteStepNumber","checkGoalStepNumber","actionStepNumber"],"mappings":";;;AAUA,MAAMA,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKd,MAAM,IAAIe,OAAO,OAAO,CAACF,OACxC,IAAIb,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMgB,aACJ,AACE,cADF,OAAQhB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMiB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAenB,OAAOO;gBAGvC,MAAMa,cAAcC,kBAAkBrB;gBAGtC,MAAMsB,eAAevB,iBAAiBC;gBACtC,MAAMuB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeuB,2BAA2B,EAC/CC,WAAW,EACXnC,WAAW,EACXoC,WAAW,EACXC,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC;IAG1B,IAAIJ,eAAe,CAACpC,aAClB,MAAM,IAAIyC,MACR;IAIJ,MAAMC,wBAAwBP,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBACLC,QACAV,cAAcqC,cAAcpC,cAAc2B;IAG9C,MAAMgB,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBN,mBAAmB;IAGjD,MAAMO,iBAAiBT,cACnB,CAAC;;;GAGJ,CAAC,GACE,CAAC;;GAEJ,CAAC;IAMF,MAAMU,aAAaF,wBACf,uGACA;IAEJ,MAAMG,mBAAmBH,wBACrB,wNACA;IAEJ,MAAMI,0BAA0B;IAEhC,MAAMC,wBAAwBL,wBAC1B,CAAC;;;;AAIP,EAAEI,yBAAyB,GACrB,CAAC;;;;AAIP,EAAEA,yBAAyB;IAEzB,MAAME,cAAcN,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAGJ,MAAMO,iBAAiBP,wBAAwB,IAAI;IACnD,MAAMQ,sBAAsBR,wBAAwB,IAAI;IACxD,MAAMS,mBAAmBT,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEE,WAAW;;AAEb,EAAEC,iBAAiB;;;;AAInB,EAAEE,sBAAsB;AACxB,EAAEC,YAAY;;QAEN,EAAEC,eAAe;;;;;;QAMjB,EAAEC,oBAAoB;;+BAEC,EAAER,wBAAwB,qCAAqC,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;sEAyB3B,EAAES,iBAAiB;;;;;;QAMjF,EAAEA,iBAAiB;;mGAEwE,EAAET,wBAAwB,kBAAkB,GAAG;;;;;;;;;;AAUlJ,EAAED,WAAW;;;;;;QAML,EAAEJ,kBAAkB;;;;;;;;;;;;;;;;;;;YAmBhB,EAAEM,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAED,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,GACL;UACS,EAAEO,eAAe;;;;;sDAK2B,EAAEC,oBAAoB;;;gDAG5B,EAAEC,iBAAiB;;;;;;;;AAQnE,CAAC;AACD"}
|
|
@@ -110,9 +110,9 @@ class Service {
|
|
|
110
110
|
dump
|
|
111
111
|
};
|
|
112
112
|
}
|
|
113
|
-
async extract(dataDemand, modelConfig, opt, pageDescription, multimodalPrompt) {
|
|
113
|
+
async extract(dataDemand, modelConfig, opt, pageDescription, multimodalPrompt, context) {
|
|
114
|
+
assert(context, 'context is required for extract');
|
|
114
115
|
assert('object' == typeof dataDemand || 'string' == typeof dataDemand, `dataDemand should be object or string, but get ${typeof dataDemand}`);
|
|
115
|
-
const context = await this.contextRetrieverFn();
|
|
116
116
|
const startTime = Date.now();
|
|
117
117
|
let parseResult;
|
|
118
118
|
let rawResponse;
|
|
@@ -215,10 +215,14 @@ class Service {
|
|
|
215
215
|
borderThickness: 3
|
|
216
216
|
});
|
|
217
217
|
if (opt?.deepThink) {
|
|
218
|
-
const searchArea = expandSearchArea(targetRect,
|
|
219
|
-
|
|
220
|
-
const
|
|
221
|
-
|
|
218
|
+
const searchArea = expandSearchArea(targetRect, size, modelFamily);
|
|
219
|
+
const widthRatio = searchArea.width / size.width;
|
|
220
|
+
const heightRatio = searchArea.height / size.height;
|
|
221
|
+
if (widthRatio >= 0.5 && heightRatio >= 0.5) {
|
|
222
|
+
debug('describe: cropping to searchArea', searchArea);
|
|
223
|
+
const croppedResult = await cropByRect(imagePayload, searchArea, 'qwen2.5-vl' === modelFamily);
|
|
224
|
+
imagePayload = croppedResult.imageBase64;
|
|
225
|
+
} else debug('describe: skip cropping, search area too small (%dx%d on %dx%d)', searchArea.width, searchArea.height, size.width, size.height);
|
|
222
226
|
}
|
|
223
227
|
const msgs = [
|
|
224
228
|
{
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"service/index.mjs","sources":["../../../src/service/index.ts"],"sourcesContent":["import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AIResponseParseError,\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_FORCE_DEEP_THINK,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n aiVendorFn?: typeof callAIWithObjectResponse;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n aiVendorFn: Exclude<ServiceOptions['aiVendorFn'], undefined> =\n callAIWithObjectResponse;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n // just for unit test, aiVendorFn is callAIWithObjectResponse by default\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepThink\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n callAIFn: this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ServiceExtractResult<T>> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const context = await this.contextRetrieverFn();\n\n const startTime = Date.now();\n\n let parseResult: Awaited<\n ReturnType<typeof AiExtractElementInfo<T>>\n >['parseResult'];\n let rawResponse: string;\n let usage: Awaited<ReturnType<typeof AiExtractElementInfo<T>>>['usage'];\n let reasoning_content: string | undefined;\n\n try {\n const result = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n parseResult = result.parseResult;\n rawResponse = result.rawResponse;\n usage = result.usage;\n reasoning_content = result.reasoning_content;\n } catch (error) {\n if (error instanceof AIResponseParseError) {\n // Create dump with usage and rawResponse from the error\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: error.rawResponse,\n usage: error.usage,\n };\n const dump = createServiceDump({\n type: 'extract',\n userQuery: { dataDemand },\n matchedElement: [],\n data: null,\n taskInfo,\n error: error.message,\n });\n throw new ServiceError(error.message, dump);\n }\n throw error;\n }\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse,\n formatResponse: JSON.stringify(parseResult),\n usage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelFamily,\n );\n debug('describe: set searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn = this\n .aiVendorFn as typeof callAIWithObjectResponse<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, modelConfig);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Service","query","opt","modelConfig","queryPrompt","assert","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","result","AiExtractElementInfo","error","AIResponseParseError","data","thought","target","size","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","croppedResult","cropByRect","msgs","callAIFn","res","content","callAIWithObjectResponse","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAgDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IA4BnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOH,QAAqBA,QAAQA,MAAM,MAAM;QACpEI,OAAOD,aAAa;QAEpBC,OAAO,AAAiB,YAAjB,OAAOJ,OAAoB;QAElC,MAAMK,wBAAwBC,oBAAoB,qBAAqB,CACrEC;QAEF,IAAIF,uBACFR,MAAM,yBAAyBQ;QAEjC,IAAIG;QACJ,IAAIR,MAAM,SAAS,IAAIK,uBACrBG,mBAAmBR,MAAM,MAAM;QAGjC,MAAM,EAAES,WAAW,EAAE,GAAGP;QAExB,IAAIM,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,UAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUZ,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIa;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIT,kBAAkB;YACpBS,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBL;gBACpBN;YACF;YACAE,OACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,gBAAgB;YACpB,UAAU,IAAI,CAAC,UAAU;YACzBb;YACA,0BAA0BV;YAC1B,cAAcc;YACdf;QACF;QAEF,MAAMyB,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAV;YACAC;YACAC;YACAS;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,4BAA4B,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNM;YACA,WAAW,CAAC,CAACd;YACb,OAAOgB;QACT;QAEA,MAAME,WAAWX,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMY,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,aAAaL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,aACR,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAV;YACAW;QACF;QAGF,OAAO;YACL,SAAS;YACTX;YACAW;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BlC,WAAyB,EACzBD,GAA0B,EAC1BoC,eAAwB,EACxBC,gBAAoC,EACF;QAClClC,OACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMvB,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAE7C,MAAMM,YAAYC,KAAK,GAAG;QAE1B,IAAIC;QAGJ,IAAIE;QACJ,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMc,SAAS,MAAMC,qBAAwB;gBAC3C3B;gBACA,WAAWuB;gBACXE;gBACA,eAAerC;gBACfC;gBACAmC;YACF;YACAhB,cAAckB,OAAO,WAAW;YAChChB,cAAcgB,OAAO,WAAW;YAChCf,QAAQe,OAAO,KAAK;YACpBd,oBAAoBc,OAAO,iBAAiB;QAC9C,EAAE,OAAOE,OAAO;YACd,IAAIA,iBAAiBC,sBAAsB;gBAEzC,MAAMf,WAAWP,KAAK,GAAG,KAAKD;gBAC9B,MAAMS,WAA4B;oBAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;oBACtC,YAAYD;oBACZ,aAAac,MAAM,WAAW;oBAC9B,OAAOA,MAAM,KAAK;gBACpB;gBACA,MAAMR,OAAOC,kBAAkB;oBAC7B,MAAM;oBACN,WAAW;wBAAEE;oBAAW;oBACxB,gBAAgB,EAAE;oBAClB,MAAM;oBACNR;oBACA,OAAOa,MAAM,OAAO;gBACtB;gBACA,MAAM,IAAIN,aAAaM,MAAM,OAAO,EAAER;YACxC;YACA,MAAMQ;QACR;QAEA,MAAMd,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZJ;YACA,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAC;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,qBAAqB,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNR;YACA,OAAOE;QACT;QAEA,MAAM,EAAEa,IAAI,EAAEC,OAAO,EAAE,GAAGvB,eAAe,CAAC;QAE1C,MAAMY,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACXY;QACF;QAEA,IAAIb,YAAY,CAACa,MACf,MAAM,IAAIR,aAAaL,UAAUG;QAGnC,OAAO;YACLU;YACAC;YACApB;YACAC;YACAQ;QACF;IACF;IAEA,MAAM,SACJY,MAA+B,EAC/B3C,WAAyB,EACzBD,GAEC,EACwD;QACzDG,OAAOyC,QAAQ;QACf,MAAMhC,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAEiC,IAAI,EAAE,GAAGjC;QACjB,MAAMkC,mBAAmBlC,QAAQ,UAAU,CAAC,MAAM;QAClDT,OAAO2C,kBAAkB;QAEzB,MAAM,EAAEtC,WAAW,EAAE,GAAGP;QACxB,MAAM8C,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBR;YAChBD;YACA,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,KAAK,WAAW;YAClB,MAAMa,aAAa0C,iBACjBL,YACAtC,QAAQ,IAAI,EACZJ;YAEFZ,MAAM,4BAA4BiB;YAClC,MAAM2C,gBAAgB,MAAMC,WAC1BJ,cACAxC,YACAL,AAAgB,iBAAhBA;YAEF6C,eAAeG,cAAc,WAAW;QAC1C;QAEA,MAAME,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WAAW,IAAI,CAClB,UAAU;QAEb,MAAMC,MAAM,MAAMD,SAASD,MAAMzD;QAEjC,MAAM,EAAE4D,OAAO,EAAE,GAAGD;QACpBzD,OAAO,CAAC0D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D1D,OAAO0D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IApVA,YACEjD,OAA2D,EAC3DZ,GAAoB,CACpB;QAVF;QAEA,qCACE8D;QAEF;QAME3D,OAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAIlD,IAAI,AAA2B,WAApBZ,KAAK,YACd,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAmUF"}
|
|
1
|
+
{"version":3,"file":"service/index.mjs","sources":["../../../src/service/index.ts"],"sourcesContent":["import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AIResponseParseError,\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_FORCE_DEEP_THINK,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n aiVendorFn?: typeof callAIWithObjectResponse;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n aiVendorFn: Exclude<ServiceOptions['aiVendorFn'], undefined> =\n callAIWithObjectResponse;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n // just for unit test, aiVendorFn is callAIWithObjectResponse by default\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepThink\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n callAIFn: this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n context?: UIContext,\n ): Promise<ServiceExtractResult<T>> {\n assert(context, 'context is required for extract');\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n\n const startTime = Date.now();\n\n let parseResult: Awaited<\n ReturnType<typeof AiExtractElementInfo<T>>\n >['parseResult'];\n let rawResponse: string;\n let usage: Awaited<ReturnType<typeof AiExtractElementInfo<T>>>['usage'];\n let reasoning_content: string | undefined;\n\n try {\n const result = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n parseResult = result.parseResult;\n rawResponse = result.rawResponse;\n usage = result.usage;\n reasoning_content = result.reasoning_content;\n } catch (error) {\n if (error instanceof AIResponseParseError) {\n // Create dump with usage and rawResponse from the error\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: error.rawResponse,\n usage: error.usage,\n };\n const dump = createServiceDump({\n type: 'extract',\n userQuery: { dataDemand },\n matchedElement: [],\n data: null,\n taskInfo,\n error: error.message,\n });\n throw new ServiceError(error.message, dump);\n }\n throw error;\n }\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse,\n formatResponse: JSON.stringify(parseResult),\n usage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(targetRect, size, modelFamily);\n // Only crop when the search area covers at least 50% of the screen\n // in both dimensions. Small crops (e.g., 500px on 1920x1080) lose\n // too much context and cause model hallucinations.\n const widthRatio = searchArea.width / size.width;\n const heightRatio = searchArea.height / size.height;\n if (widthRatio >= 0.5 && heightRatio >= 0.5) {\n debug('describe: cropping to searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n } else {\n debug(\n 'describe: skip cropping, search area too small (%dx%d on %dx%d)',\n searchArea.width,\n searchArea.height,\n size.width,\n size.height,\n );\n }\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn = this\n .aiVendorFn as typeof callAIWithObjectResponse<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, modelConfig);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Service","query","opt","modelConfig","queryPrompt","assert","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","result","AiExtractElementInfo","error","AIResponseParseError","data","thought","target","size","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","widthRatio","heightRatio","croppedResult","cropByRect","msgs","callAIFn","res","content","callAIWithObjectResponse","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAgDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IA4BnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOH,QAAqBA,QAAQA,MAAM,MAAM;QACpEI,OAAOD,aAAa;QAEpBC,OAAO,AAAiB,YAAjB,OAAOJ,OAAoB;QAElC,MAAMK,wBAAwBC,oBAAoB,qBAAqB,CACrEC;QAEF,IAAIF,uBACFR,MAAM,yBAAyBQ;QAEjC,IAAIG;QACJ,IAAIR,MAAM,SAAS,IAAIK,uBACrBG,mBAAmBR,MAAM,MAAM;QAGjC,MAAM,EAAES,WAAW,EAAE,GAAGP;QAExB,IAAIM,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,UAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUZ,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIa;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIT,kBAAkB;YACpBS,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBL;gBACpBN;YACF;YACAE,OACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,gBAAgB;YACpB,UAAU,IAAI,CAAC,UAAU;YACzBb;YACA,0BAA0BV;YAC1B,cAAcc;YACdf;QACF;QAEF,MAAMyB,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAV;YACAC;YACAC;YACAS;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,4BAA4B,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNM;YACA,WAAW,CAAC,CAACd;YACb,OAAOgB;QACT;QAEA,MAAME,WAAWX,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMY,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,aAAaL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,aACR,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAV;YACAW;QACF;QAGF,OAAO;YACL,SAAS;YACTX;YACAW;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BlC,WAAyB,EACzBD,GAA0B,EAC1BoC,eAAwB,EACxBC,gBAAoC,EACpCzB,OAAmB,EACe;QAClCT,OAAOS,SAAS;QAChBT,OACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAGvE,MAAMjB,YAAYC,KAAK,GAAG;QAE1B,IAAIC;QAGJ,IAAIE;QACJ,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMc,SAAS,MAAMC,qBAAwB;gBAC3C3B;gBACA,WAAWuB;gBACXE;gBACA,eAAerC;gBACfC;gBACAmC;YACF;YACAhB,cAAckB,OAAO,WAAW;YAChChB,cAAcgB,OAAO,WAAW;YAChCf,QAAQe,OAAO,KAAK;YACpBd,oBAAoBc,OAAO,iBAAiB;QAC9C,EAAE,OAAOE,OAAO;YACd,IAAIA,iBAAiBC,sBAAsB;gBAEzC,MAAMf,WAAWP,KAAK,GAAG,KAAKD;gBAC9B,MAAMS,WAA4B;oBAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;oBACtC,YAAYD;oBACZ,aAAac,MAAM,WAAW;oBAC9B,OAAOA,MAAM,KAAK;gBACpB;gBACA,MAAMR,OAAOC,kBAAkB;oBAC7B,MAAM;oBACN,WAAW;wBAAEE;oBAAW;oBACxB,gBAAgB,EAAE;oBAClB,MAAM;oBACNR;oBACA,OAAOa,MAAM,OAAO;gBACtB;gBACA,MAAM,IAAIN,aAAaM,MAAM,OAAO,EAAER;YACxC;YACA,MAAMQ;QACR;QAEA,MAAMd,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZJ;YACA,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAC;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,qBAAqB,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNR;YACA,OAAOE;QACT;QAEA,MAAM,EAAEa,IAAI,EAAEC,OAAO,EAAE,GAAGvB,eAAe,CAAC;QAE1C,MAAMY,OAAOC,kBAAkB;YAC7B,GAAGH,QAAQ;YACXY;QACF;QAEA,IAAIb,YAAY,CAACa,MACf,MAAM,IAAIR,aAAaL,UAAUG;QAGnC,OAAO;YACLU;YACAC;YACApB;YACAC;YACAQ;QACF;IACF;IAEA,MAAM,SACJY,MAA+B,EAC/B3C,WAAyB,EACzBD,GAEC,EACwD;QACzDG,OAAOyC,QAAQ;QACf,MAAMhC,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAEiC,IAAI,EAAE,GAAGjC;QACjB,MAAMkC,mBAAmBlC,QAAQ,UAAU,CAAC,MAAM;QAClDT,OAAO2C,kBAAkB;QAEzB,MAAM,EAAEtC,WAAW,EAAE,GAAGP;QACxB,MAAM8C,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBR;YAChBD;YACA,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,KAAK,WAAW;YAClB,MAAMa,aAAa0C,iBAAiBL,YAAYL,MAAMrC;YAItD,MAAMgD,aAAa3C,WAAW,KAAK,GAAGgC,KAAK,KAAK;YAChD,MAAMY,cAAc5C,WAAW,MAAM,GAAGgC,KAAK,MAAM;YACnD,IAAIW,cAAc,OAAOC,eAAe,KAAK;gBAC3C7D,MAAM,oCAAoCiB;gBAC1C,MAAM6C,gBAAgB,MAAMC,WAC1BN,cACAxC,YACAL,AAAgB,iBAAhBA;gBAEF6C,eAAeK,cAAc,WAAW;YAC1C,OACE9D,MACE,mEACAiB,WAAW,KAAK,EAChBA,WAAW,MAAM,EACjBgC,KAAK,KAAK,EACVA,KAAK,MAAM;QAGjB;QAEA,MAAMe,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASb;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMQ,WAAW,IAAI,CAClB,UAAU;QAEb,MAAMC,MAAM,MAAMD,SAASD,MAAM3D;QAEjC,MAAM,EAAE8D,OAAO,EAAE,GAAGD;QACpB3D,OAAO,CAAC4D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D5D,OAAO4D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAhWA,YACEnD,OAA2D,EAC3DZ,GAAoB,CACpB;QAVF;QAEA,qCACEgE;QAEF;QAME7D,OAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMqD,QAAQ,OAAO,CAACrD;QAIlD,IAAI,AAA2B,WAApBZ,KAAK,YACd,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AA+UF"}
|
package/dist/es/task-runner.mjs
CHANGED
|
@@ -132,7 +132,12 @@ class TaskRunner {
|
|
|
132
132
|
if (task.subTask) {
|
|
133
133
|
uiContext = this.findPreviousNonSubTaskUIContext(taskIndex);
|
|
134
134
|
assert(uiContext, 'subTask requires uiContext from previous non-subTask task');
|
|
135
|
-
} else
|
|
135
|
+
} else {
|
|
136
|
+
const forceRefresh = 'Insight' === task.type;
|
|
137
|
+
uiContext = await this.getUiContext({
|
|
138
|
+
forceRefresh
|
|
139
|
+
});
|
|
140
|
+
}
|
|
136
141
|
task.uiContext = uiContext;
|
|
137
142
|
const executorContext = {
|
|
138
143
|
task,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"task-runner.mjs","sources":["../../src/task-runner.ts"],"sourcesContent":["import type { ScreenshotItem } from '@/screenshot-item';\nimport {\n ExecutionDump,\n type ExecutionRecorderItem,\n type ExecutionTask,\n type ExecutionTaskActionApply,\n type ExecutionTaskApply,\n type ExecutionTaskPlanningLocateOutput,\n type ExecutionTaskProgressOptions,\n type ExecutionTaskReturn,\n type ExecutorContext,\n type PlanningActionParamError,\n type UIContext,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\n\nconst debug = getDebug('task-runner');\nconst UI_CONTEXT_CACHE_TTL_MS = 300;\n\ntype TaskRunnerInitOptions = ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n};\n\ntype TaskRunnerOperationOptions = {\n allowWhenError?: boolean;\n};\n\nexport class TaskRunner {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of runner\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly uiContextBuilder: () => Promise<UIContext>;\n\n private readonly onTaskUpdate?:\n | ((runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void)\n | undefined;\n\n constructor(\n name: string,\n uiContextBuilder: () => Promise<UIContext>,\n options?: TaskRunnerInitOptions,\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n this.uiContextBuilder = uiContextBuilder;\n this.onTaskUpdate = options?.onTaskUpdate;\n }\n\n private async emitOnTaskUpdate(error?: TaskExecutionError): Promise<void> {\n if (!this.onTaskUpdate) {\n return;\n }\n await this.onTaskUpdate(this, error);\n }\n\n private lastUiContext?: {\n context: UIContext;\n capturedAt: number;\n };\n\n private async getUiContext(options?: { forceRefresh?: boolean }): Promise<\n UIContext | undefined\n > {\n const now = Date.now();\n const shouldReuse =\n !options?.forceRefresh &&\n this.lastUiContext &&\n now - this.lastUiContext.capturedAt <= UI_CONTEXT_CACHE_TTL_MS;\n\n if (shouldReuse && this.lastUiContext?.context) {\n debug(\n `reuse cached uiContext captured ${now - this.lastUiContext.capturedAt}ms ago`,\n );\n return this.lastUiContext?.context;\n }\n\n try {\n const uiContext = await this.uiContextBuilder();\n if (uiContext) {\n this.lastUiContext = {\n context: uiContext,\n capturedAt: Date.now(),\n };\n } else {\n this.lastUiContext = undefined;\n }\n return uiContext;\n } catch (error) {\n this.lastUiContext = undefined;\n throw error;\n }\n }\n\n private async captureScreenshot(): Promise<ScreenshotItem | undefined> {\n try {\n const uiContext = await this.getUiContext({ forceRefresh: true });\n return uiContext?.screenshot;\n } catch (error) {\n console.error('error while capturing screenshot', error);\n }\n return undefined;\n }\n\n private attachRecorderItem(\n task: ExecutionTask,\n screenshot: ScreenshotItem | undefined,\n phase: 'after-calling',\n ): void {\n if (!phase || !screenshot) {\n return;\n }\n\n const recorderItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: Date.now(),\n screenshot,\n timing: phase,\n };\n\n if (!task.recorder) {\n task.recorder = [recorderItem];\n return;\n }\n task.recorder.push(recorderItem);\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n private normalizeStatusFromError(\n options?: TaskRunnerOperationOptions,\n errorMessage?: string,\n ): void {\n if (this.status !== 'error') {\n return;\n }\n assert(\n options?.allowWhenError,\n errorMessage ||\n `task runner is in error state, cannot proceed\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n // reset runner state so new tasks can run\n this.status = this.tasks.length > 0 ? 'pending' : 'init';\n }\n\n private findPreviousNonSubTaskUIContext(\n currentIndex: number,\n ): UIContext | undefined {\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.tasks[i];\n if (!candidate || candidate.subTask) {\n continue;\n }\n if (candidate.uiContext) {\n return candidate.uiContext;\n }\n }\n return undefined;\n }\n\n async append(\n task: ExecutionTaskApply[] | ExecutionTaskApply,\n options?: TaskRunnerOperationOptions,\n ): Promise<void> {\n this.normalizeStatusFromError(\n options,\n `task runner is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n await this.emitOnTaskUpdate();\n }\n\n async appendAndFlush(\n task: ExecutionTaskApply[] | ExecutionTaskApply,\n options?: TaskRunnerOperationOptions,\n ): Promise<{ output: any; thought?: string } | undefined> {\n await this.append(task, options);\n return this.flush(options);\n }\n\n async flush(\n options?: TaskRunnerOperationOptions,\n ): Promise<{ output: any; thought?: string } | undefined> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for task runner, status is init but tasks are not empty',\n );\n }\n\n this.normalizeStatusFromError(options, 'task runner is in error state');\n assert(this.status !== 'running', 'task runner is already running');\n assert(this.status !== 'completed', 'task runner is already completed');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n await this.emitOnTaskUpdate();\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskPlanningLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n await this.emitOnTaskUpdate();\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action Space', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n let uiContext: UIContext | undefined;\n if (task.subTask) {\n uiContext = this.findPreviousNonSubTaskUIContext(taskIndex);\n assert(\n uiContext,\n 'subTask requires uiContext from previous non-subTask task',\n );\n } else {\n uiContext = await this.getUiContext();\n }\n task.uiContext = uiContext;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n uiContext,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'WaitFor' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported service subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n } else if (task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskPlanningLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action Space') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n const isLastTask = taskIndex === this.tasks.length - 1;\n\n if (isLastTask) {\n const screenshot = await this.captureScreenshot();\n this.attachRecorderItem(task, screenshot, 'after-calling');\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n await this.emitOnTaskUpdate();\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n await this.emitOnTaskUpdate();\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n if (taskIndex + 1 < this.tasks.length) {\n await this.emitOnTaskUpdate();\n }\n\n let finalizeError: TaskExecutionError | undefined;\n if (!successfullyCompleted) {\n this.status = 'error';\n const errorTask = this.latestErrorTask();\n const messageBase =\n errorTask?.errorMessage ||\n (errorTask?.error ? String(errorTask.error) : 'Task execution failed');\n const stack = errorTask?.errorStack;\n const message = stack ? `${messageBase}\\n${stack}` : messageBase;\n finalizeError = new TaskExecutionError(message, this, errorTask, {\n cause: errorTask?.error,\n });\n await this.emitOnTaskUpdate(finalizeError);\n } else {\n this.status = 'completed';\n await this.emitOnTaskUpdate();\n }\n\n if (finalizeError) {\n throw finalizeError;\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n const { thought, output } = this.tasks[outputIndex];\n return {\n thought,\n output,\n };\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n // Find the LAST failed task (not the first one)\n // This is important when using allowWhenError to continue after errors\n for (let i = this.tasks.length - 1; i >= 0; i--) {\n if (this.tasks[i].status === 'failed') {\n return this.tasks[i];\n }\n }\n return null;\n }\n\n dump(): ExecutionDump {\n return new ExecutionDump({\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n });\n }\n\n async appendErrorPlan(errorMsg: string): Promise<{\n output: undefined;\n runner: TaskRunner;\n }> {\n const errorTask: ExecutionTaskActionApply<PlanningActionParamError> = {\n type: 'Action Space',\n subType: 'Error',\n param: {\n thought: errorMsg,\n },\n thought: errorMsg,\n executor: async () => {\n throw new Error(errorMsg || 'error without thought');\n },\n };\n await this.appendAndFlush(errorTask);\n\n return {\n output: undefined,\n runner: this,\n };\n }\n}\n\nexport class TaskExecutionError extends Error {\n runner: TaskRunner;\n\n errorTask: ExecutionTask | null;\n\n constructor(\n message: string,\n runner: TaskRunner,\n errorTask: ExecutionTask | null,\n options?: { cause?: unknown },\n ) {\n super(message, options);\n this.runner = runner;\n this.errorTask = errorTask;\n }\n}\n"],"names":["debug","getDebug","UI_CONTEXT_CACHE_TTL_MS","TaskRunner","error","options","now","Date","shouldReuse","uiContext","undefined","console","task","screenshot","phase","recorderItem","errorMessage","assert","currentIndex","i","candidate","Array","item","nextPendingIndex","taskIndex","successfullyCompleted","previousFindOutput","e","executor","param","returnValue","executorContext","isLastTask","Object","finalizeError","errorTask","messageBase","String","stack","message","TaskExecutionError","outputIndex","Math","thought","output","ExecutionDump","errorMsg","Error","name","uiContextBuilder","runner"],"mappings":";;;;;;;;;;;;;AAiBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,0BAA0B;AAczB,MAAMC;IAgCX,MAAc,iBAAiBC,KAA0B,EAAiB;QACxE,IAAI,CAAC,IAAI,CAAC,YAAY,EACpB;QAEF,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAEA;IAChC;IAOA,MAAc,aAAaC,OAAoC,EAE7D;QACA,MAAMC,MAAMC,KAAK,GAAG;QACpB,MAAMC,cACJ,CAACH,SAAS,gBACV,IAAI,CAAC,aAAa,IAClBC,MAAM,IAAI,CAAC,aAAa,CAAC,UAAU,IAAIJ;QAEzC,IAAIM,eAAe,IAAI,CAAC,aAAa,EAAE,SAAS;YAC9CR,MACE,CAAC,gCAAgC,EAAEM,MAAM,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,CAAC;YAEhF,OAAO,IAAI,CAAC,aAAa,EAAE;QAC7B;QAEA,IAAI;YACF,MAAMG,YAAY,MAAM,IAAI,CAAC,gBAAgB;YAC7C,IAAIA,WACF,IAAI,CAAC,aAAa,GAAG;gBACnB,SAASA;gBACT,YAAYF,KAAK,GAAG;YACtB;iBAEA,IAAI,CAAC,aAAa,GAAGG;YAEvB,OAAOD;QACT,EAAE,OAAOL,OAAO;YACd,IAAI,CAAC,aAAa,GAAGM;YACrB,MAAMN;QACR;IACF;IAEA,MAAc,oBAAyD;QACrE,IAAI;YACF,MAAMK,YAAY,MAAM,IAAI,CAAC,YAAY,CAAC;gBAAE,cAAc;YAAK;YAC/D,OAAOA,WAAW;QACpB,EAAE,OAAOL,OAAO;YACdO,QAAQ,KAAK,CAAC,oCAAoCP;QACpD;IAEF;IAEQ,mBACNQ,IAAmB,EACnBC,UAAsC,EACtCC,KAAsB,EAChB;QACN,IAAI,CAACA,SAAS,CAACD,YACb;QAGF,MAAME,eAAsC;YAC1C,MAAM;YACN,IAAIR,KAAK,GAAG;YACZM;YACA,QAAQC;QACV;QAEA,IAAI,CAACF,KAAK,QAAQ,EAAE;YAClBA,KAAK,QAAQ,GAAG;gBAACG;aAAa;YAC9B;QACF;QACAH,KAAK,QAAQ,CAAC,IAAI,CAACG;IACrB;IAEQ,kBAAkBH,IAAwB,EAAiB;QACjE,OAAO;YACL,QAAQ;YACR,GAAGA,IAAI;QACT;IACF;IAEQ,yBACNP,OAAoC,EACpCW,YAAqB,EACf;QACN,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb;QAEFC,OACEZ,SAAS,gBACTW,gBACE,CAAC,qDAAqD,EAAE,IAAI,CAAC,eAAe,IAAI,MAAM,EAAE,EAAE,IAAI,CAAC,eAAe,IAAI,YAAY;QAGlI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;IACpD;IAEQ,gCACNE,YAAoB,EACG;QACvB,IAAK,IAAIC,IAAID,eAAe,GAAGC,KAAK,GAAGA,IAAK;YAC1C,MAAMC,YAAY,IAAI,CAAC,KAAK,CAACD,EAAE;YAC/B,IAAI,AAACC,cAAaA,UAAU,OAAO,EAGnC;gBAAA,IAAIA,UAAU,SAAS,EACrB,OAAOA,UAAU,SAAS;YAC5B;QACF;IAEF;IAEA,MAAM,OACJR,IAA+C,EAC/CP,OAAoC,EACrB;QACf,IAAI,CAAC,wBAAwB,CAC3BA,SACA,CAAC,yDAAyD,EAAE,IAAI,CAAC,eAAe,IAAI,MAAM,EAAE,EAAE,IAAI,CAAC,eAAe,IAAI,YAAY;QAEpI,IAAIgB,MAAM,OAAO,CAACT,OAChB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAIA,KAAK,GAAG,CAAC,CAACU,OAAS,IAAI,CAAC,iBAAiB,CAACA;aAE7D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAACV;QAEzC,IAAI,AAAgB,cAAhB,IAAI,CAAC,MAAM,EACb,IAAI,CAAC,MAAM,GAAG;QAEhB,MAAM,IAAI,CAAC,gBAAgB;IAC7B;IAEA,MAAM,eACJA,IAA+C,EAC/CP,OAAoC,EACoB;QACxD,MAAM,IAAI,CAAC,MAAM,CAACO,MAAMP;QACxB,OAAO,IAAI,CAAC,KAAK,CAACA;IACpB;IAEA,MAAM,MACJA,OAAoC,EACoB;QACxD,IAAI,AAAgB,WAAhB,IAAI,CAAC,MAAM,IAAe,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAChDM,QAAQ,IAAI,CACV;QAIJ,IAAI,CAAC,wBAAwB,CAACN,SAAS;QACvCY,OAAO,AAAgB,cAAhB,IAAI,CAAC,MAAM,EAAgB;QAClCA,OAAO,AAAgB,gBAAhB,IAAI,CAAC,MAAM,EAAkB;QAEpC,MAAMM,mBAAmB,IAAI,CAAC,KAAK,CAAC,SAAS,CAC3C,CAACX,OAASA,AAAgB,cAAhBA,KAAK,MAAM;QAEvB,IAAIW,mBAAmB,GAErB;QAGF,IAAI,CAAC,MAAM,GAAG;QACd,MAAM,IAAI,CAAC,gBAAgB;QAC3B,IAAIC,YAAYD;QAChB,IAAIE,wBAAwB;QAE5B,IAAIC;QAEJ,MAAOF,YAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAE;YACpC,MAAMZ,OAAO,IAAI,CAAC,KAAK,CAACY,UAAU;YAClCP,OACEL,AAAgB,cAAhBA,KAAK,MAAM,EACX,CAAC,wCAAwC,EAAEA,KAAK,MAAM,EAAE;YAE1DA,KAAK,MAAM,GAAG;gBACZ,OAAOL,KAAK,GAAG;YACjB;YACA,IAAI;gBACFK,KAAK,MAAM,GAAG;gBACd,MAAM,IAAI,CAAC,gBAAgB;gBAC3B,IAAI;oBACF,IAAI,IAAI,CAAC,WAAW,EAClB,MAAM,IAAI,CAAC,WAAW,CAACA;gBAE3B,EAAE,OAAOe,GAAG;oBACVhB,QAAQ,KAAK,CAAC,wBAAwBgB;gBACxC;gBACAV,OACE;oBAAC;oBAAW;oBAAgB;iBAAW,CAAC,OAAO,CAACL,KAAK,IAAI,KAAK,GAC9D,CAAC,uBAAuB,EAAEA,KAAK,IAAI,EAAE;gBAGvC,MAAM,EAAEgB,QAAQ,EAAEC,KAAK,EAAE,GAAGjB;gBAC5BK,OAAOW,UAAU,CAAC,oCAAoC,EAAEhB,KAAK,IAAI,EAAE;gBAEnE,IAAIkB;gBACJ,IAAIrB;gBACJ,IAAIG,KAAK,OAAO,EAAE;oBAChBH,YAAY,IAAI,CAAC,+BAA+B,CAACe;oBACjDP,OACER,WACA;gBAEJ,OACEA,YAAY,MAAM,IAAI,CAAC,YAAY;gBAErCG,KAAK,SAAS,GAAGH;gBACjB,MAAMsB,kBAAmC;oBACvCnB;oBACA,SAASc,oBAAoB;oBAC7BjB;gBACF;gBAEA,IAAIG,AAAc,cAAdA,KAAK,IAAI,EAAgB;oBAC3BK,OACEL,AAAiB,YAAjBA,KAAK,OAAO,IACVA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,EACd,CAAC,6BAA6B,EAAEA,KAAK,OAAO,EAAE;oBAEhDkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOE;gBAC3C,OAAO,IAAInB,AAAc,eAAdA,KAAK,IAAI,EAAiB;oBACnCkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOE;oBACzC,IAAInB,AAAiB,aAAjBA,KAAK,OAAO,EACdc,qBACEI,aACC;gBAEP,OAAO,IAAIlB,AAAc,mBAAdA,KAAK,IAAI,EAClBkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOE;qBACpC;oBACLpB,QAAQ,IAAI,CACV,CAAC,uBAAuB,EAAEC,KAAK,IAAI,CAAC,iCAAiC,CAAC;oBAExEkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOE;gBAC3C;gBAEA,MAAMC,aAAaR,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;gBAErD,IAAIQ,YAAY;oBACd,MAAMnB,aAAa,MAAM,IAAI,CAAC,iBAAiB;oBAC/C,IAAI,CAAC,kBAAkB,CAACD,MAAMC,YAAY;gBAC5C;gBAEAoB,OAAO,MAAM,CAACrB,MAAMkB;gBACpBlB,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGL,KAAK,GAAG;gBAC1BK,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD,MAAM,IAAI,CAAC,gBAAgB;gBAC3BY;YACF,EAAE,OAAOG,GAAQ;gBACfF,wBAAwB;gBACxBb,KAAK,KAAK,GAAGe;gBACbf,KAAK,YAAY,GACfe,GAAG,WAAY,CAAa,YAAb,OAAOA,IAAiBA,IAAI,uBAAsB;gBACnEf,KAAK,UAAU,GAAGe,EAAE,KAAK;gBAEzBf,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGL,KAAK,GAAG;gBAC1BK,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD,MAAM,IAAI,CAAC,gBAAgB;gBAC3B;YACF;QACF;QAGA,IAAK,IAAIO,IAAIK,YAAY,GAAGL,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAEA,IACjD,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,GAAG;QAEzB,IAAIK,YAAY,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EACnC,MAAM,IAAI,CAAC,gBAAgB;QAG7B,IAAIU;QACJ,IAAKT,uBAYE;YACL,IAAI,CAAC,MAAM,GAAG;YACd,MAAM,IAAI,CAAC,gBAAgB;QAC7B,OAf4B;YAC1B,IAAI,CAAC,MAAM,GAAG;YACd,MAAMU,YAAY,IAAI,CAAC,eAAe;YACtC,MAAMC,cACJD,WAAW,gBACVA,CAAAA,WAAW,QAAQE,OAAOF,UAAU,KAAK,IAAI,uBAAsB;YACtE,MAAMG,QAAQH,WAAW;YACzB,MAAMI,UAAUD,QAAQ,GAAGF,YAAY,EAAE,EAAEE,OAAO,GAAGF;YACrDF,gBAAgB,IAAIM,mBAAmBD,SAAS,IAAI,EAAEJ,WAAW;gBAC/D,OAAOA,WAAW;YACpB;YACA,MAAM,IAAI,CAAC,gBAAgB,CAACD;QAC9B;QAKA,IAAIA,eACF,MAAMA;QAGR,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAErB,MAAMO,cAAcC,KAAK,GAAG,CAAClB,WAAW,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;YAC5D,MAAM,EAAEmB,OAAO,EAAEC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAACH,YAAY;YACnD,OAAO;gBACLE;gBACAC;YACF;QACF;IACF;IAEA,iBAA0B;QACxB,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM;IACpB;IAEA,kBAAwC;QACtC,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb,OAAO;QAIT,IAAK,IAAIzB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAGA,KAAK,GAAGA,IAC1C,IAAI,AAAyB,aAAzB,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,EACtB,OAAO,IAAI,CAAC,KAAK,CAACA,EAAE;QAGxB,OAAO;IACT;IAEA,OAAsB;QACpB,OAAO,IAAI0B,cAAc;YACvB,SAAStC,KAAK,GAAG;YACjB,MAAM,IAAI,CAAC,IAAI;YACf,OAAO,IAAI,CAAC,KAAK;QACnB;IACF;IAEA,MAAM,gBAAgBuC,QAAgB,EAGnC;QACD,MAAMX,YAAgE;YACpE,MAAM;YACN,SAAS;YACT,OAAO;gBACL,SAASW;YACX;YACA,SAASA;YACT,UAAU;gBACR,MAAM,IAAIC,MAAMD,YAAY;YAC9B;QACF;QACA,MAAM,IAAI,CAAC,cAAc,CAACX;QAE1B,OAAO;YACL,QAAQzB;YACR,QAAQ,IAAI;QACd;IACF;IAvXA,YACEsC,IAAY,EACZC,gBAA0C,EAC1C5C,OAA+B,CAC/B;QAnBF;QAEA;QAGA;QAEA;QAEA,uBAAiB,oBAAjB;QAEA,uBAAiB,gBAAjB;QA2BA,uBAAQ,iBAAR;QAlBE,IAAI,CAAC,MAAM,GACTA,SAAS,SAASA,QAAQ,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;QAC3D,IAAI,CAAC,IAAI,GAAG2C;QACZ,IAAI,CAAC,KAAK,GAAI3C,AAAAA,CAAAA,SAAS,SAAS,EAAC,EAAG,GAAG,CAAC,CAACiB,OACvC,IAAI,CAAC,iBAAiB,CAACA;QAEzB,IAAI,CAAC,WAAW,GAAGjB,SAAS;QAC5B,IAAI,CAAC,gBAAgB,GAAG4C;QACxB,IAAI,CAAC,YAAY,GAAG5C,SAAS;IAC/B;AA0WF;AAEO,MAAMmC,2BAA2BO;IAKtC,YACER,OAAe,EACfW,MAAkB,EAClBf,SAA+B,EAC/B9B,OAA6B,CAC7B;QACA,KAAK,CAACkC,SAASlC,UAVjB,0CAEA;QASE,IAAI,CAAC,MAAM,GAAG6C;QACd,IAAI,CAAC,SAAS,GAAGf;IACnB;AACF"}
|
|
1
|
+
{"version":3,"file":"task-runner.mjs","sources":["../../src/task-runner.ts"],"sourcesContent":["import type { ScreenshotItem } from '@/screenshot-item';\nimport {\n ExecutionDump,\n type ExecutionRecorderItem,\n type ExecutionTask,\n type ExecutionTaskActionApply,\n type ExecutionTaskApply,\n type ExecutionTaskPlanningLocateOutput,\n type ExecutionTaskProgressOptions,\n type ExecutionTaskReturn,\n type ExecutorContext,\n type PlanningActionParamError,\n type UIContext,\n} from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\n\nconst debug = getDebug('task-runner');\nconst UI_CONTEXT_CACHE_TTL_MS = 300;\n\ntype TaskRunnerInitOptions = ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n};\n\ntype TaskRunnerOperationOptions = {\n allowWhenError?: boolean;\n};\n\nexport class TaskRunner {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of runner\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly uiContextBuilder: () => Promise<UIContext>;\n\n private readonly onTaskUpdate?:\n | ((runner: TaskRunner, error?: TaskExecutionError) => Promise<void> | void)\n | undefined;\n\n constructor(\n name: string,\n uiContextBuilder: () => Promise<UIContext>,\n options?: TaskRunnerInitOptions,\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n this.uiContextBuilder = uiContextBuilder;\n this.onTaskUpdate = options?.onTaskUpdate;\n }\n\n private async emitOnTaskUpdate(error?: TaskExecutionError): Promise<void> {\n if (!this.onTaskUpdate) {\n return;\n }\n await this.onTaskUpdate(this, error);\n }\n\n private lastUiContext?: {\n context: UIContext;\n capturedAt: number;\n };\n\n private async getUiContext(options?: { forceRefresh?: boolean }): Promise<\n UIContext | undefined\n > {\n const now = Date.now();\n const shouldReuse =\n !options?.forceRefresh &&\n this.lastUiContext &&\n now - this.lastUiContext.capturedAt <= UI_CONTEXT_CACHE_TTL_MS;\n\n if (shouldReuse && this.lastUiContext?.context) {\n debug(\n `reuse cached uiContext captured ${now - this.lastUiContext.capturedAt}ms ago`,\n );\n return this.lastUiContext?.context;\n }\n\n try {\n const uiContext = await this.uiContextBuilder();\n if (uiContext) {\n this.lastUiContext = {\n context: uiContext,\n capturedAt: Date.now(),\n };\n } else {\n this.lastUiContext = undefined;\n }\n return uiContext;\n } catch (error) {\n this.lastUiContext = undefined;\n throw error;\n }\n }\n\n private async captureScreenshot(): Promise<ScreenshotItem | undefined> {\n try {\n const uiContext = await this.getUiContext({ forceRefresh: true });\n return uiContext?.screenshot;\n } catch (error) {\n console.error('error while capturing screenshot', error);\n }\n return undefined;\n }\n\n private attachRecorderItem(\n task: ExecutionTask,\n screenshot: ScreenshotItem | undefined,\n phase: 'after-calling',\n ): void {\n if (!phase || !screenshot) {\n return;\n }\n\n const recorderItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: Date.now(),\n screenshot,\n timing: phase,\n };\n\n if (!task.recorder) {\n task.recorder = [recorderItem];\n return;\n }\n task.recorder.push(recorderItem);\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n private normalizeStatusFromError(\n options?: TaskRunnerOperationOptions,\n errorMessage?: string,\n ): void {\n if (this.status !== 'error') {\n return;\n }\n assert(\n options?.allowWhenError,\n errorMessage ||\n `task runner is in error state, cannot proceed\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n // reset runner state so new tasks can run\n this.status = this.tasks.length > 0 ? 'pending' : 'init';\n }\n\n private findPreviousNonSubTaskUIContext(\n currentIndex: number,\n ): UIContext | undefined {\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.tasks[i];\n if (!candidate || candidate.subTask) {\n continue;\n }\n if (candidate.uiContext) {\n return candidate.uiContext;\n }\n }\n return undefined;\n }\n\n async append(\n task: ExecutionTaskApply[] | ExecutionTaskApply,\n options?: TaskRunnerOperationOptions,\n ): Promise<void> {\n this.normalizeStatusFromError(\n options,\n `task runner is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n await this.emitOnTaskUpdate();\n }\n\n async appendAndFlush(\n task: ExecutionTaskApply[] | ExecutionTaskApply,\n options?: TaskRunnerOperationOptions,\n ): Promise<{ output: any; thought?: string } | undefined> {\n await this.append(task, options);\n return this.flush(options);\n }\n\n async flush(\n options?: TaskRunnerOperationOptions,\n ): Promise<{ output: any; thought?: string } | undefined> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for task runner, status is init but tasks are not empty',\n );\n }\n\n this.normalizeStatusFromError(options, 'task runner is in error state');\n assert(this.status !== 'running', 'task runner is already running');\n assert(this.status !== 'completed', 'task runner is already completed');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n await this.emitOnTaskUpdate();\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskPlanningLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n await this.emitOnTaskUpdate();\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action Space', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n let uiContext: UIContext | undefined;\n if (task.subTask) {\n uiContext = this.findPreviousNonSubTaskUIContext(taskIndex);\n assert(\n uiContext,\n 'subTask requires uiContext from previous non-subTask task',\n );\n } else {\n // For Insight tasks (Query/Assert/WaitFor), always get fresh context\n // to ensure we have the latest UI state after any preceding actions\n const forceRefresh = task.type === 'Insight';\n uiContext = await this.getUiContext({ forceRefresh });\n }\n task.uiContext = uiContext;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n uiContext,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'WaitFor' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported service subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n } else if (task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskPlanningLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action Space') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n const isLastTask = taskIndex === this.tasks.length - 1;\n\n if (isLastTask) {\n const screenshot = await this.captureScreenshot();\n this.attachRecorderItem(task, screenshot, 'after-calling');\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n await this.emitOnTaskUpdate();\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n await this.emitOnTaskUpdate();\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n if (taskIndex + 1 < this.tasks.length) {\n await this.emitOnTaskUpdate();\n }\n\n let finalizeError: TaskExecutionError | undefined;\n if (!successfullyCompleted) {\n this.status = 'error';\n const errorTask = this.latestErrorTask();\n const messageBase =\n errorTask?.errorMessage ||\n (errorTask?.error ? String(errorTask.error) : 'Task execution failed');\n const stack = errorTask?.errorStack;\n const message = stack ? `${messageBase}\\n${stack}` : messageBase;\n finalizeError = new TaskExecutionError(message, this, errorTask, {\n cause: errorTask?.error,\n });\n await this.emitOnTaskUpdate(finalizeError);\n } else {\n this.status = 'completed';\n await this.emitOnTaskUpdate();\n }\n\n if (finalizeError) {\n throw finalizeError;\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n const { thought, output } = this.tasks[outputIndex];\n return {\n thought,\n output,\n };\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n // Find the LAST failed task (not the first one)\n // This is important when using allowWhenError to continue after errors\n for (let i = this.tasks.length - 1; i >= 0; i--) {\n if (this.tasks[i].status === 'failed') {\n return this.tasks[i];\n }\n }\n return null;\n }\n\n dump(): ExecutionDump {\n return new ExecutionDump({\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n });\n }\n\n async appendErrorPlan(errorMsg: string): Promise<{\n output: undefined;\n runner: TaskRunner;\n }> {\n const errorTask: ExecutionTaskActionApply<PlanningActionParamError> = {\n type: 'Action Space',\n subType: 'Error',\n param: {\n thought: errorMsg,\n },\n thought: errorMsg,\n executor: async () => {\n throw new Error(errorMsg || 'error without thought');\n },\n };\n await this.appendAndFlush(errorTask);\n\n return {\n output: undefined,\n runner: this,\n };\n }\n}\n\nexport class TaskExecutionError extends Error {\n runner: TaskRunner;\n\n errorTask: ExecutionTask | null;\n\n constructor(\n message: string,\n runner: TaskRunner,\n errorTask: ExecutionTask | null,\n options?: { cause?: unknown },\n ) {\n super(message, options);\n this.runner = runner;\n this.errorTask = errorTask;\n }\n}\n"],"names":["debug","getDebug","UI_CONTEXT_CACHE_TTL_MS","TaskRunner","error","options","now","Date","shouldReuse","uiContext","undefined","console","task","screenshot","phase","recorderItem","errorMessage","assert","currentIndex","i","candidate","Array","item","nextPendingIndex","taskIndex","successfullyCompleted","previousFindOutput","e","executor","param","returnValue","forceRefresh","executorContext","isLastTask","Object","finalizeError","errorTask","messageBase","String","stack","message","TaskExecutionError","outputIndex","Math","thought","output","ExecutionDump","errorMsg","Error","name","uiContextBuilder","runner"],"mappings":";;;;;;;;;;;;;AAiBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,0BAA0B;AAczB,MAAMC;IAgCX,MAAc,iBAAiBC,KAA0B,EAAiB;QACxE,IAAI,CAAC,IAAI,CAAC,YAAY,EACpB;QAEF,MAAM,IAAI,CAAC,YAAY,CAAC,IAAI,EAAEA;IAChC;IAOA,MAAc,aAAaC,OAAoC,EAE7D;QACA,MAAMC,MAAMC,KAAK,GAAG;QACpB,MAAMC,cACJ,CAACH,SAAS,gBACV,IAAI,CAAC,aAAa,IAClBC,MAAM,IAAI,CAAC,aAAa,CAAC,UAAU,IAAIJ;QAEzC,IAAIM,eAAe,IAAI,CAAC,aAAa,EAAE,SAAS;YAC9CR,MACE,CAAC,gCAAgC,EAAEM,MAAM,IAAI,CAAC,aAAa,CAAC,UAAU,CAAC,MAAM,CAAC;YAEhF,OAAO,IAAI,CAAC,aAAa,EAAE;QAC7B;QAEA,IAAI;YACF,MAAMG,YAAY,MAAM,IAAI,CAAC,gBAAgB;YAC7C,IAAIA,WACF,IAAI,CAAC,aAAa,GAAG;gBACnB,SAASA;gBACT,YAAYF,KAAK,GAAG;YACtB;iBAEA,IAAI,CAAC,aAAa,GAAGG;YAEvB,OAAOD;QACT,EAAE,OAAOL,OAAO;YACd,IAAI,CAAC,aAAa,GAAGM;YACrB,MAAMN;QACR;IACF;IAEA,MAAc,oBAAyD;QACrE,IAAI;YACF,MAAMK,YAAY,MAAM,IAAI,CAAC,YAAY,CAAC;gBAAE,cAAc;YAAK;YAC/D,OAAOA,WAAW;QACpB,EAAE,OAAOL,OAAO;YACdO,QAAQ,KAAK,CAAC,oCAAoCP;QACpD;IAEF;IAEQ,mBACNQ,IAAmB,EACnBC,UAAsC,EACtCC,KAAsB,EAChB;QACN,IAAI,CAACA,SAAS,CAACD,YACb;QAGF,MAAME,eAAsC;YAC1C,MAAM;YACN,IAAIR,KAAK,GAAG;YACZM;YACA,QAAQC;QACV;QAEA,IAAI,CAACF,KAAK,QAAQ,EAAE;YAClBA,KAAK,QAAQ,GAAG;gBAACG;aAAa;YAC9B;QACF;QACAH,KAAK,QAAQ,CAAC,IAAI,CAACG;IACrB;IAEQ,kBAAkBH,IAAwB,EAAiB;QACjE,OAAO;YACL,QAAQ;YACR,GAAGA,IAAI;QACT;IACF;IAEQ,yBACNP,OAAoC,EACpCW,YAAqB,EACf;QACN,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb;QAEFC,OACEZ,SAAS,gBACTW,gBACE,CAAC,qDAAqD,EAAE,IAAI,CAAC,eAAe,IAAI,MAAM,EAAE,EAAE,IAAI,CAAC,eAAe,IAAI,YAAY;QAGlI,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;IACpD;IAEQ,gCACNE,YAAoB,EACG;QACvB,IAAK,IAAIC,IAAID,eAAe,GAAGC,KAAK,GAAGA,IAAK;YAC1C,MAAMC,YAAY,IAAI,CAAC,KAAK,CAACD,EAAE;YAC/B,IAAI,AAACC,cAAaA,UAAU,OAAO,EAGnC;gBAAA,IAAIA,UAAU,SAAS,EACrB,OAAOA,UAAU,SAAS;YAC5B;QACF;IAEF;IAEA,MAAM,OACJR,IAA+C,EAC/CP,OAAoC,EACrB;QACf,IAAI,CAAC,wBAAwB,CAC3BA,SACA,CAAC,yDAAyD,EAAE,IAAI,CAAC,eAAe,IAAI,MAAM,EAAE,EAAE,IAAI,CAAC,eAAe,IAAI,YAAY;QAEpI,IAAIgB,MAAM,OAAO,CAACT,OAChB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAIA,KAAK,GAAG,CAAC,CAACU,OAAS,IAAI,CAAC,iBAAiB,CAACA;aAE7D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAACV;QAEzC,IAAI,AAAgB,cAAhB,IAAI,CAAC,MAAM,EACb,IAAI,CAAC,MAAM,GAAG;QAEhB,MAAM,IAAI,CAAC,gBAAgB;IAC7B;IAEA,MAAM,eACJA,IAA+C,EAC/CP,OAAoC,EACoB;QACxD,MAAM,IAAI,CAAC,MAAM,CAACO,MAAMP;QACxB,OAAO,IAAI,CAAC,KAAK,CAACA;IACpB;IAEA,MAAM,MACJA,OAAoC,EACoB;QACxD,IAAI,AAAgB,WAAhB,IAAI,CAAC,MAAM,IAAe,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAChDM,QAAQ,IAAI,CACV;QAIJ,IAAI,CAAC,wBAAwB,CAACN,SAAS;QACvCY,OAAO,AAAgB,cAAhB,IAAI,CAAC,MAAM,EAAgB;QAClCA,OAAO,AAAgB,gBAAhB,IAAI,CAAC,MAAM,EAAkB;QAEpC,MAAMM,mBAAmB,IAAI,CAAC,KAAK,CAAC,SAAS,CAC3C,CAACX,OAASA,AAAgB,cAAhBA,KAAK,MAAM;QAEvB,IAAIW,mBAAmB,GAErB;QAGF,IAAI,CAAC,MAAM,GAAG;QACd,MAAM,IAAI,CAAC,gBAAgB;QAC3B,IAAIC,YAAYD;QAChB,IAAIE,wBAAwB;QAE5B,IAAIC;QAEJ,MAAOF,YAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAE;YACpC,MAAMZ,OAAO,IAAI,CAAC,KAAK,CAACY,UAAU;YAClCP,OACEL,AAAgB,cAAhBA,KAAK,MAAM,EACX,CAAC,wCAAwC,EAAEA,KAAK,MAAM,EAAE;YAE1DA,KAAK,MAAM,GAAG;gBACZ,OAAOL,KAAK,GAAG;YACjB;YACA,IAAI;gBACFK,KAAK,MAAM,GAAG;gBACd,MAAM,IAAI,CAAC,gBAAgB;gBAC3B,IAAI;oBACF,IAAI,IAAI,CAAC,WAAW,EAClB,MAAM,IAAI,CAAC,WAAW,CAACA;gBAE3B,EAAE,OAAOe,GAAG;oBACVhB,QAAQ,KAAK,CAAC,wBAAwBgB;gBACxC;gBACAV,OACE;oBAAC;oBAAW;oBAAgB;iBAAW,CAAC,OAAO,CAACL,KAAK,IAAI,KAAK,GAC9D,CAAC,uBAAuB,EAAEA,KAAK,IAAI,EAAE;gBAGvC,MAAM,EAAEgB,QAAQ,EAAEC,KAAK,EAAE,GAAGjB;gBAC5BK,OAAOW,UAAU,CAAC,oCAAoC,EAAEhB,KAAK,IAAI,EAAE;gBAEnE,IAAIkB;gBACJ,IAAIrB;gBACJ,IAAIG,KAAK,OAAO,EAAE;oBAChBH,YAAY,IAAI,CAAC,+BAA+B,CAACe;oBACjDP,OACER,WACA;gBAEJ,OAAO;oBAGL,MAAMsB,eAAenB,AAAc,cAAdA,KAAK,IAAI;oBAC9BH,YAAY,MAAM,IAAI,CAAC,YAAY,CAAC;wBAAEsB;oBAAa;gBACrD;gBACAnB,KAAK,SAAS,GAAGH;gBACjB,MAAMuB,kBAAmC;oBACvCpB;oBACA,SAASc,oBAAoB;oBAC7BjB;gBACF;gBAEA,IAAIG,AAAc,cAAdA,KAAK,IAAI,EAAgB;oBAC3BK,OACEL,AAAiB,YAAjBA,KAAK,OAAO,IACVA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,EACd,CAAC,6BAA6B,EAAEA,KAAK,OAAO,EAAE;oBAEhDkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOG;gBAC3C,OAAO,IAAIpB,AAAc,eAAdA,KAAK,IAAI,EAAiB;oBACnCkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOG;oBACzC,IAAIpB,AAAiB,aAAjBA,KAAK,OAAO,EACdc,qBACEI,aACC;gBAEP,OAAO,IAAIlB,AAAc,mBAAdA,KAAK,IAAI,EAClBkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOG;qBACpC;oBACLrB,QAAQ,IAAI,CACV,CAAC,uBAAuB,EAAEC,KAAK,IAAI,CAAC,iCAAiC,CAAC;oBAExEkB,cAAc,MAAMlB,KAAK,QAAQ,CAACiB,OAAOG;gBAC3C;gBAEA,MAAMC,aAAaT,cAAc,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;gBAErD,IAAIS,YAAY;oBACd,MAAMpB,aAAa,MAAM,IAAI,CAAC,iBAAiB;oBAC/C,IAAI,CAAC,kBAAkB,CAACD,MAAMC,YAAY;gBAC5C;gBAEAqB,OAAO,MAAM,CAACtB,MAAMkB;gBACpBlB,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGL,KAAK,GAAG;gBAC1BK,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD,MAAM,IAAI,CAAC,gBAAgB;gBAC3BY;YACF,EAAE,OAAOG,GAAQ;gBACfF,wBAAwB;gBACxBb,KAAK,KAAK,GAAGe;gBACbf,KAAK,YAAY,GACfe,GAAG,WAAY,CAAa,YAAb,OAAOA,IAAiBA,IAAI,uBAAsB;gBACnEf,KAAK,UAAU,GAAGe,EAAE,KAAK;gBAEzBf,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGL,KAAK,GAAG;gBAC1BK,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD,MAAM,IAAI,CAAC,gBAAgB;gBAC3B;YACF;QACF;QAGA,IAAK,IAAIO,IAAIK,YAAY,GAAGL,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAEA,IACjD,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,GAAG;QAEzB,IAAIK,YAAY,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EACnC,MAAM,IAAI,CAAC,gBAAgB;QAG7B,IAAIW;QACJ,IAAKV,uBAYE;YACL,IAAI,CAAC,MAAM,GAAG;YACd,MAAM,IAAI,CAAC,gBAAgB;QAC7B,OAf4B;YAC1B,IAAI,CAAC,MAAM,GAAG;YACd,MAAMW,YAAY,IAAI,CAAC,eAAe;YACtC,MAAMC,cACJD,WAAW,gBACVA,CAAAA,WAAW,QAAQE,OAAOF,UAAU,KAAK,IAAI,uBAAsB;YACtE,MAAMG,QAAQH,WAAW;YACzB,MAAMI,UAAUD,QAAQ,GAAGF,YAAY,EAAE,EAAEE,OAAO,GAAGF;YACrDF,gBAAgB,IAAIM,mBAAmBD,SAAS,IAAI,EAAEJ,WAAW;gBAC/D,OAAOA,WAAW;YACpB;YACA,MAAM,IAAI,CAAC,gBAAgB,CAACD;QAC9B;QAKA,IAAIA,eACF,MAAMA;QAGR,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAErB,MAAMO,cAAcC,KAAK,GAAG,CAACnB,WAAW,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;YAC5D,MAAM,EAAEoB,OAAO,EAAEC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAACH,YAAY;YACnD,OAAO;gBACLE;gBACAC;YACF;QACF;IACF;IAEA,iBAA0B;QACxB,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM;IACpB;IAEA,kBAAwC;QACtC,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb,OAAO;QAIT,IAAK,IAAI1B,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAAGA,KAAK,GAAGA,IAC1C,IAAI,AAAyB,aAAzB,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,EACtB,OAAO,IAAI,CAAC,KAAK,CAACA,EAAE;QAGxB,OAAO;IACT;IAEA,OAAsB;QACpB,OAAO,IAAI2B,cAAc;YACvB,SAASvC,KAAK,GAAG;YACjB,MAAM,IAAI,CAAC,IAAI;YACf,OAAO,IAAI,CAAC,KAAK;QACnB;IACF;IAEA,MAAM,gBAAgBwC,QAAgB,EAGnC;QACD,MAAMX,YAAgE;YACpE,MAAM;YACN,SAAS;YACT,OAAO;gBACL,SAASW;YACX;YACA,SAASA;YACT,UAAU;gBACR,MAAM,IAAIC,MAAMD,YAAY;YAC9B;QACF;QACA,MAAM,IAAI,CAAC,cAAc,CAACX;QAE1B,OAAO;YACL,QAAQ1B;YACR,QAAQ,IAAI;QACd;IACF;IA1XA,YACEuC,IAAY,EACZC,gBAA0C,EAC1C7C,OAA+B,CAC/B;QAnBF;QAEA;QAGA;QAEA;QAEA,uBAAiB,oBAAjB;QAEA,uBAAiB,gBAAjB;QA2BA,uBAAQ,iBAAR;QAlBE,IAAI,CAAC,MAAM,GACTA,SAAS,SAASA,QAAQ,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;QAC3D,IAAI,CAAC,IAAI,GAAG4C;QACZ,IAAI,CAAC,KAAK,GAAI5C,AAAAA,CAAAA,SAAS,SAAS,EAAC,EAAG,GAAG,CAAC,CAACiB,OACvC,IAAI,CAAC,iBAAiB,CAACA;QAEzB,IAAI,CAAC,WAAW,GAAGjB,SAAS;QAC5B,IAAI,CAAC,gBAAgB,GAAG6C;QACxB,IAAI,CAAC,YAAY,GAAG7C,SAAS;IAC/B;AA6WF;AAEO,MAAMoC,2BAA2BO;IAKtC,YACER,OAAe,EACfW,MAAkB,EAClBf,SAA+B,EAC/B/B,OAA6B,CAC7B;QACA,KAAK,CAACmC,SAASnC,UAVjB,0CAEA;QASE,IAAI,CAAC,MAAM,GAAG8C;QACd,IAAI,CAAC,SAAS,GAAGf;IACnB;AACF"}
|