@midscene/core 1.2.2-beta-20260120025423.0 → 1.2.2-beta-20260120030311.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/package.json +2 -2
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -103,7 +103,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
103
103
|
return;
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
|
-
const getMidsceneVersion = ()=>"1.2.2-beta-
|
|
106
|
+
const getMidsceneVersion = ()=>"1.2.2-beta-20260120030311.0";
|
|
107
107
|
const parsePrompt = (prompt)=>{
|
|
108
108
|
if ('string' == typeof prompt) return {
|
|
109
109
|
textPrompt: prompt,
|
|
@@ -112,6 +112,7 @@ Please tell what the next one action is (or null if no action should be done) to
|
|
|
112
112
|
- Make sure the previous actions are completed successfully before performing the next step
|
|
113
113
|
- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the "error" field to the error message.
|
|
114
114
|
- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the "Print_Assert_Result" action.
|
|
115
|
+
- Call the "Finalize" action when the task is completed and no more actions should be done.
|
|
115
116
|
|
|
116
117
|
## Supporting actions
|
|
117
118
|
${actionList}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst buildCommonOutputFields = (\n includeThought: boolean,\n preferredLanguage: string,\n) => {\n const fields = [\n `\"note\"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action. Use ${preferredLanguage}.`,\n `\"log\": string, // a brief preamble to the user explaining what you're about to do. Use ${preferredLanguage}.`,\n `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use ${preferredLanguage}.`,\n ];\n\n if (includeThought) {\n fields.unshift(\n `\"thought\": string, // your thought process about the next action`,\n );\n }\n\n return fields.join('\\n ');\n};\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n const commonOutputFields = buildCommonOutputFields(\n shouldIncludeThought,\n preferredLanguage,\n );\n const exampleThoughtLine = shouldIncludeThought\n ? ` \"thought\": \"The form has already been filled, I need to click the login button to login\",\n`\n : '';\n const exampleThoughtLineWithNote = shouldIncludeThought\n ? ` \"thought\": \"I need to note the titles in the current screenshot for further processing and scroll to find more titles\",\n`\n : '';\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n${exampleThoughtLine} \"log\": \"Click the login button\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${modelFamily && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n{\n${exampleThoughtLineWithNote} \"note\": \"The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'\",\n \"log\": \"Scroll to find more titles\",\n \"action\": {\n \"type\": \"Scroll\",\n \"param\": {\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n }\n }\n}\n`;\n}\n"],"names":["buildCommonOutputFields","includeThought","preferredLanguage","fields","vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","getPreferredLanguage","Error","actionDescriptionList","actionList","logFieldInstruction","shouldIncludeThought","commonOutputFields","exampleThoughtLine","exampleThoughtLineWithNote"],"mappings":";;;AAaA,MAAMA,0BAA0B,CAC9BC,gBACAC;IAEA,MAAMC,SAAS;QACb,CAAC,yRAAyR,EAAED,kBAAkB,CAAC,CAAC;QAChT,CAAC,uFAAuF,EAAEA,kBAAkB,CAAC,CAAC;QAC9G,CAAC,2KAA2K,EAAEA,kBAAkB,CAAC,CAAC;KACnM;IAED,IAAID,gBACFE,OAAO,OAAO,CACZ;IAIJ,OAAOA,OAAO,IAAI,CAAC;AACrB;AAEA,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMb,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEW,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMG,aAAuB,EAAE;QAG/B,MAAMC,SAASJ,OAAO,WAAW;QAIjC,MAAMK,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKb,MAAM,IAAIc,OAAO,OAAO,CAACF,OACxC,IAAIZ,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AACE,cADF,OAAQf,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMgB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAelB,OAAOO;gBAGvC,MAAMY,cAAcC,kBAAkBpB;gBAGtC,MAAMqB,eAAetB,iBAAiBC;gBACtC,MAAMsB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBd,OAAO,IAAI,CAAC;gBACZc,WAAW,OAAO,CAAC,CAACmB;oBAClBjC,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEiC,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBlC,OAAO,IAAI,CAACkC;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEvB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMb,OAAO,IAAI,CAAC,CAAC,EAAE,EAAEa,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAesB,2BAA2B,EAC/CC,WAAW,EACXlC,WAAW,EACXmC,WAAW,EACXvC,cAAc,EAMf;IACC,MAAMC,oBAAoBuC;IAG1B,IAAID,eAAe,CAACnC,aAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBJ,YAAY,GAAG,CAAC,CAACzB,SACtCD,qBACLC,QACAV,cAAcoC,cAAcnC,cAAc0B;IAG9C,MAAMa,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,sBAAsB,CAAC;;;;;QAKvB,EAAE3C,kBAAkB;;;;;;;;;;AAU5B,CAAC;IAEC,MAAM4C,uBAAuB7C,kBAAkB;IAC/C,MAAM8C,qBAAqB/C,wBACzB8C,sBACA5C;IAEF,MAAM8C,qBAAqBF,uBACvB,CAAC;AACP,CAAC,GACK;IACJ,MAAMG,6BAA6BH,uBAC/B,CAAC;AACP,CAAC,GACK;IAEJ,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAEF,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;EAMpB,EAAEE,mBAAmB;;;;;;;;;;;;;AAavB,EAAEC,mBAAmB;;;;;oCAKe,EAAE3C,eAAemC,cAAc,mCAAmC,GAAG;;;;;;;;;AASzG,EAAES,2BAA2B;;;;;;;;;;;;AAY7B,CAAC;AACD"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TModelFamily } from '@midscene/shared/env';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst buildCommonOutputFields = (\n includeThought: boolean,\n preferredLanguage: string,\n) => {\n const fields = [\n `\"note\"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action. Use ${preferredLanguage}.`,\n `\"log\": string, // a brief preamble to the user explaining what you're about to do. Use ${preferredLanguage}.`,\n `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use ${preferredLanguage}.`,\n ];\n\n if (includeThought) {\n fields.unshift(\n `\"thought\": string, // your thought process about the next action`,\n );\n }\n\n return fields.join('\\n ');\n};\n\nconst vlLocateParam = (modelFamily: TModelFamily | undefined) => {\n if (modelFamily) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(modelFamily)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n modelFamily,\n includeBbox,\n includeThought,\n}: {\n actionSpace: DeviceAction<any>[];\n modelFamily: TModelFamily | undefined;\n includeBbox: boolean;\n includeThought?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n // Validate parameters: if includeBbox is true, modelFamily must be defined\n if (includeBbox && !modelFamily) {\n throw new Error(\n 'modelFamily cannot be undefined when includeBbox is true. A valid modelFamily is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? modelFamily : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n const shouldIncludeThought = includeThought ?? true;\n const commonOutputFields = buildCommonOutputFields(\n shouldIncludeThought,\n preferredLanguage,\n );\n const exampleThoughtLine = shouldIncludeThought\n ? ` \"thought\": \"The form has already been filled, I need to click the login button to login\",\n`\n : '';\n const exampleThoughtLineWithNote = shouldIncludeThought\n ? ` \"thought\": \"I need to note the titles in the current screenshot for further processing and scroll to find more titles\",\n`\n : '';\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n- Call the \"Finalize\" action when the task is completed and no more actions should be done.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n${exampleThoughtLine} \"log\": \"Click the login button\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${modelFamily && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n{\n${exampleThoughtLineWithNote} \"note\": \"The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'\",\n \"log\": \"Scroll to find more titles\",\n \"action\": {\n \"type\": \"Scroll\",\n \"param\": {\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n }\n }\n}\n`;\n}\n"],"names":["buildCommonOutputFields","includeThought","preferredLanguage","fields","vlLocateParam","modelFamily","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","getPreferredLanguage","Error","actionDescriptionList","actionList","logFieldInstruction","shouldIncludeThought","commonOutputFields","exampleThoughtLine","exampleThoughtLineWithNote"],"mappings":";;;AAaA,MAAMA,0BAA0B,CAC9BC,gBACAC;IAEA,MAAMC,SAAS;QACb,CAAC,yRAAyR,EAAED,kBAAkB,CAAC,CAAC;QAChT,CAAC,uFAAuF,EAAEA,kBAAkB,CAAC,CAAC;QAC9G,CAAC,2KAA2K,EAAEA,kBAAkB,CAAC,CAAC;KACnM;IAED,IAAID,gBACFE,OAAO,OAAO,CACZ;IAIJ,OAAOA,OAAO,IAAI,CAAC;AACrB;AAEA,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,aACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,cAAc;IAEvG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMb,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEW,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMG,aAAuB,EAAE;QAG/B,MAAMC,SAASJ,OAAO,WAAW;QAIjC,MAAMK,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKb,MAAM,IAAIc,OAAO,OAAO,CAACF,OACxC,IAAIZ,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMe,aACJ,AACE,cADF,OAAQf,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMgB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAelB,OAAOO;gBAGvC,MAAMY,cAAcC,kBAAkBpB;gBAGtC,MAAMqB,eAAetB,iBAAiBC;gBACtC,MAAMsB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBd,OAAO,IAAI,CAAC;gBACZc,WAAW,OAAO,CAAC,CAACmB;oBAClBjC,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEiC,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBlC,OAAO,IAAI,CAACkC;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEvB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMb,OAAO,IAAI,CAAC,CAAC,EAAE,EAAEa,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAesB,2BAA2B,EAC/CC,WAAW,EACXlC,WAAW,EACXmC,WAAW,EACXvC,cAAc,EAMf;IACC,MAAMC,oBAAoBuC;IAG1B,IAAID,eAAe,CAACnC,aAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBJ,YAAY,GAAG,CAAC,CAACzB,SACtCD,qBACLC,QACAV,cAAcoC,cAAcnC,cAAc0B;IAG9C,MAAMa,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,sBAAsB,CAAC;;;;;QAKvB,EAAE3C,kBAAkB;;;;;;;;;;AAU5B,CAAC;IAEC,MAAM4C,uBAAuB7C,kBAAkB;IAC/C,MAAM8C,qBAAqB/C,wBACzB8C,sBACA5C;IAEF,MAAM8C,qBAAqBF,uBACvB,CAAC;AACP,CAAC,GACK;IACJ,MAAMG,6BAA6BH,uBAC/B,CAAC;AACP,CAAC,GACK;IAEJ,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAEF,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;EAMpB,EAAEE,mBAAmB;;;;;;;;;;;;;AAavB,EAAEC,mBAAmB;;;;;oCAKe,EAAE3C,eAAemC,cAAc,mCAAmC,GAAG;;;;;;;;;AASzG,EAAES,2BAA2B;;;;;;;;;;;;AAY7B,CAAC;AACD"}
|