@midscene/core 0.30.6-beta-20251022112352.0 → 0.30.6-beta-20251023092723.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -540,8 +540,10 @@ class TaskExecutor {
540
540
  const { data, usage, thought } = await this.insight.extract(demandInput, modelConfig, opt, multimodalPrompt);
541
541
  let outputResult = data;
542
542
  if (ifTypeRestricted) if ('string' == typeof data) outputResult = data;
543
+ else if ('WaitFor' === type) outputResult = null == data ? false : data[keyOfResult];
544
+ else if (null == data) outputResult = null;
543
545
  else {
544
- assert('WaitFor' !== type ? (null == data ? void 0 : data[keyOfResult]) !== void 0 : true, 'No result in query data');
546
+ assert((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
545
547
  outputResult = data[keyOfResult];
546
548
  }
547
549
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import {\n ConversationHistory,\n findAllMidsceneLocatorField,\n parseActionParam,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { Executor } from '@/ai-model/action-executor';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Insight from '@/insight';\nimport type {\n AIUsageInfo,\n DetailedLocateParam,\n DumpSubscriber,\n ElementCacheFeature,\n ExecutionRecorderItem,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanning,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n ExecutorContext,\n InsightDump,\n InsightExtractOption,\n InsightExtractParam,\n InterfaceType,\n LocateResultElement,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamError,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n PlanningLocateParam,\n} from '@/types';\nimport { sleep } from '@/utils';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { taskTitleStr } from './ui-utils';\nimport {\n matchElementFromCache,\n matchElementFromPlan,\n parsePrompt,\n} from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n executor: Executor;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n insight: Insight;\n\n taskCache?: TaskCache;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n insight: Insight,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.insight = insight;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n }\n\n private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {\n const base64 = await this.interface.screenshotBase64();\n const item: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: Date.now(),\n screenshot: base64,\n timing,\n };\n return item;\n }\n\n private prependExecutorWithScreenshot(\n taskApply: ExecutionTaskApply,\n appendAfterExecution = false,\n ): ExecutionTaskApply {\n const taskWithScreenshot: ExecutionTaskApply = {\n ...taskApply,\n executor: async (param, context, ...args) => {\n const recorder: ExecutionRecorderItem[] = [];\n const { task } = context;\n // set the recorder before executor in case of error\n task.recorder = recorder;\n const shot = await this.recordScreenshot(`before ${task.type}`);\n recorder.push(shot);\n\n const result = await taskApply.executor(param, context, ...args);\n\n if (appendAfterExecution) {\n const shot2 = await this.recordScreenshot('after Action');\n recorder.push(shot2);\n }\n return result;\n },\n };\n return taskWithScreenshot;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n cacheable?: boolean,\n ) {\n const tasks: ExecutionTaskApply[] = [];\n\n const taskForLocatePlan = (\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply => {\n if (typeof detailedLocateParam === 'string') {\n detailedLocateParam = {\n prompt: detailedLocateParam,\n };\n }\n // Apply cacheable option from convertPlanToExecutable if it was explicitly set\n if (cacheable !== undefined) {\n detailedLocateParam = {\n ...detailedLocateParam,\n cacheable,\n };\n }\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n param: detailedLocateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n assert(\n param?.prompt || param?.id || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n let insightDump: InsightDump | undefined;\n let usage: AIUsageInfo | undefined;\n const dumpCollector: DumpSubscriber = (dump) => {\n insightDump = dump;\n usage = dump?.taskInfo?.usage;\n\n task.log = {\n dump: insightDump,\n };\n\n task.usage = usage;\n\n // Store searchAreaUsage in task metadata\n if (dump?.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n this.insight.onceDumpUpdatedFn = dumpCollector;\n const shotTime = Date.now();\n\n // Get context through contextRetrieverFn which handles frozen context\n const uiContext = await this.insight.contextRetrieverFn('locate');\n task.uiContext = uiContext;\n\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Insight',\n };\n task.recorder = [recordItem];\n\n // try matching xpath\n const elementFromXpath =\n param.xpath && (this.interface as any).getElementInfoByXpath\n ? await (this.interface as any).getElementInfoByXpath(param.xpath)\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n // try matching cache\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n this,\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n // try matching plan\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param, uiContext.tree)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n // try ai locate\n const elementFromAiLocate =\n !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag\n ? (\n await this.insight.locate(\n param,\n {\n // fallback to ai locate\n context: uiContext,\n },\n modelConfig,\n )\n ).element\n : undefined;\n const aiLocateHitFlag = !!elementFromAiLocate;\n\n const element =\n elementFromXpath || // highest priority\n elementFromCache || // second priority\n elementFromPlan || // third priority\n elementFromAiLocate;\n\n // update cache\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n element.isOrderSensitive !== undefined\n ? { _orderSensitive: element.isOrderSensitive }\n : undefined,\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n if (!element) {\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n id: elementFromPlan?.id,\n bbox: elementFromPlan?.bbox,\n },\n };\n } else if (aiLocateHitFlag) {\n hitBy = {\n from: 'AI model',\n context: {\n prompt: param.prompt,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n uiContext,\n hitBy,\n };\n },\n };\n return taskFind;\n };\n\n for (const plan of plans) {\n if (plan.type === 'Locate') {\n if (\n !plan.locate ||\n plan.locate === null ||\n plan.locate?.id === null ||\n plan.locate?.id === 'null'\n ) {\n debug('Locate action with id is null, will be ignored', plan);\n continue;\n }\n const taskLocate = taskForLocatePlan(plan, plan.locate);\n\n tasks.push(taskLocate);\n } else if (plan.type === 'Error') {\n const taskActionError: ExecutionTaskActionApply<PlanningActionParamError> =\n {\n type: 'Action',\n subType: 'Error',\n param: plan.param,\n thought: plan.thought || plan.param?.thought,\n locate: plan.locate,\n executor: async () => {\n throw new Error(\n plan?.thought || plan.param?.thought || 'error without thought',\n );\n },\n };\n tasks.push(taskActionError);\n } else if (plan.type === 'Finished') {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n executor: async (param) => {},\n };\n tasks.push(taskActionFinished);\n } else if (plan.type === 'Sleep') {\n const taskActionSleep: ExecutionTaskActionApply<PlanningActionParamSleep> =\n {\n type: 'Action',\n subType: 'Sleep',\n param: plan.param,\n thought: plan.thought,\n locate: plan.locate,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n tasks.push(taskActionSleep);\n } else {\n // action in action space\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((action) => action.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n // find all params that needs location\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = taskForLocatePlan(\n locatePlan,\n param[field],\n (result) => {\n param[field] = result;\n },\n );\n tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, context) => {\n debug(\n 'executing action',\n planType,\n param,\n `context.element.center: ${context.element?.center}`,\n );\n\n // Get context for actionSpace operations to ensure size info is available\n const uiContext = await this.insight.contextRetrieverFn('locate');\n context.task.uiContext = uiContext;\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n // Validate and parse parameters with defaults\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, context);\n debug('called action', action.name);\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n // Return a proper result for report generation\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n tasks.push(task);\n }\n }\n\n const wrappedTasks = tasks.map(\n (task: ExecutionTaskApply, index: number) => {\n if (task.type === 'Action') {\n return this.prependExecutorWithScreenshot(\n task,\n index === tasks.length - 1,\n );\n }\n return task;\n },\n );\n\n return {\n tasks: wrappedTasks,\n };\n }\n\n private async setupPlanningContext(executorContext: ExecutorContext) {\n const shotTime = Date.now();\n const uiContext = await this.insight.contextRetrieverFn('locate');\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Planning',\n };\n\n executorContext.task.recorder = [recordItem];\n (executorContext.task as ExecutionTaskPlanning).uiContext = uiContext;\n\n return {\n uiContext,\n };\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const taskExecutor = new Executor(taskTitleStr('Action', userInstruction), {\n onTaskStart: this.onTaskStartCallback,\n });\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n await this.setupPlanningContext(executorContext);\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n\n await taskExecutor.append(task);\n await taskExecutor.flush();\n\n return {\n executor: taskExecutor,\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = await this.setupPlanningContext(executorContext);\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push({\n type: 'Sleep',\n param: {\n timeMs: timeRemaining,\n },\n locate: null,\n } as PlanningAction<PlanningActionParamSleep>);\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const taskExecutor = new Executor(title, {\n onTaskStart: this.onTaskStartCallback,\n });\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n await taskExecutor.append(tasks);\n const result = await taskExecutor.flush();\n const { output } = result!;\n return {\n output,\n executor: taskExecutor,\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const taskExecutor = new Executor(taskTitleStr('Action', userPrompt), {\n onTaskStart: this.onTaskStartCallback,\n });\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n await taskExecutor.append(planningTask);\n const result = await taskExecutor.flush();\n const planResult: PlanningAIResponse = result?.output;\n if (taskExecutor.isInErrorState()) {\n return {\n output: planResult,\n executor: taskExecutor,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfig,\n cacheable,\n );\n taskExecutor.append(executables.tasks);\n } catch (error) {\n return this.appendErrorPlan(\n taskExecutor,\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n modelConfig,\n );\n }\n\n await taskExecutor.flush();\n if (taskExecutor.isInErrorState()) {\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n executor: taskExecutor,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: InsightExtractParam,\n modelConfig: IModelConfig,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let insightDump: InsightDump | undefined;\n const dumpCollector: DumpSubscriber = (dump) => {\n insightDump = dump;\n };\n this.insight.onceDumpUpdatedFn = dumpCollector;\n\n // Get context for query operations\n const shotTime = Date.now();\n const uiContext = await this.insight.contextRetrieverFn('extract');\n task.uiContext = uiContext;\n\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Extract',\n };\n task.recorder = [recordItem];\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n const { data, usage, thought } = await this.insight.extract<any>(\n demandInput,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else {\n assert(\n type !== 'WaitFor' ? data?.[keyOfResult] !== undefined : true,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n return {\n output: outputResult,\n log: insightDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: InsightExtractParam,\n modelConfig: IModelConfig,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const taskExecutor = new Executor(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n {\n onTaskStart: this.onTaskStartCallback,\n },\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));\n const result = await taskExecutor.flush();\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n executor: taskExecutor,\n };\n }\n\n private async appendErrorPlan(\n taskExecutor: Executor,\n errorMsg: string,\n modelConfig: IModelConfig,\n ) {\n const errorPlan: PlanningAction<PlanningActionParamError> = {\n type: 'Error',\n param: {\n thought: errorMsg,\n },\n locate: null,\n };\n const { tasks } = await this.convertPlanToExecutable(\n [errorPlan],\n modelConfig,\n );\n await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));\n await taskExecutor.flush();\n\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n async taskForSleep(timeMs: number, modelConfig: IModelConfig) {\n const sleepPlan: PlanningAction<PlanningActionParamSleep> = {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n // The convertPlanToExecutable requires modelConfig as a parameter but will not consume it when type is Sleep\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [sleepPlan],\n modelConfig,\n );\n\n return this.prependExecutorWithScreenshot(sleepTasks[0]);\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const taskExecutor = new Executor(taskTitleStr('WaitFor', description), {\n onTaskStart: this.onTaskStartCallback,\n });\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));\n const result = (await taskExecutor.flush()) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);\n await taskExecutor.append(sleepTask);\n }\n }\n\n return this.appendErrorPlan(\n taskExecutor,\n `waitFor timeout: ${errorThought}`,\n modelConfig,\n );\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","locatePlanForLocate","param","locate","locatePlan","TaskExecutor","timing","base64","item","Date","taskApply","appendAfterExecution","taskWithScreenshot","context","args","recorder","task","shot","result","shot2","plans","modelConfig","cacheable","tasks","taskForLocatePlan","plan","detailedLocateParam","onResult","undefined","taskFind","taskContext","_this_taskCache","_locateCacheRecord_cacheContent","assert","JSON","insightDump","usage","dumpCollector","dump","_dump_taskInfo","_dump_taskInfo1","shotTime","uiContext","recordItem","elementFromXpath","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","aiLocateHitFlag","element","currentCacheEntry","feature","Object","error","Error","hitBy","_plan_locate","_plan_locate1","taskLocate","_plan_param","taskActionError","taskActionFinished","taskActionSleep","taskParam","sleep","planType","actionSpace","action","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","locateTask","_context_element","Promise","originalError","originalMessage","String","parseActionParam","actionFn","wrappedTasks","index","executorContext","userInstruction","yamlString","taskExecutor","Executor","taskTitleStr","actionContext","startTime","vlMode","uiTarsModelVersion","Array","console","planResult","uiTarsPlanning","actions","log","more_actions_needed_by_instruction","rawResponse","finalActions","timeNow","timeRemaining","title","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","data","thought","outputResult","errorPlan","timeMs","sleepPlan","sleepTasks","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","insight","opts","ConversationHistory"],"mappings":";;;;;;;;;;;;;;;;;;AA6DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAEO,MAAMC;IAcX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAmBA,MAAc,iBAAiBC,MAAuC,EAAE;QACtE,MAAMC,SAAS,MAAM,IAAI,CAAC,SAAS,CAAC,gBAAgB;QACpD,MAAMC,OAA8B;YAClC,MAAM;YACN,IAAIC,KAAK,GAAG;YACZ,YAAYF;YACZD;QACF;QACA,OAAOE;IACT;IAEQ,8BACNE,SAA6B,EAC7BC,uBAAuB,KAAK,EACR;QACpB,MAAMC,qBAAyC;YAC7C,GAAGF,SAAS;YACZ,UAAU,OAAOR,OAAOW,SAAS,GAAGC;gBAClC,MAAMC,WAAoC,EAAE;gBAC5C,MAAM,EAAEC,IAAI,EAAE,GAAGH;gBAEjBG,KAAK,QAAQ,GAAGD;gBAChB,MAAME,OAAO,MAAM,IAAI,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAED,KAAK,IAAI,EAAE;gBAC9DD,SAAS,IAAI,CAACE;gBAEd,MAAMC,SAAS,MAAMR,UAAU,QAAQ,CAACR,OAAOW,YAAYC;gBAE3D,IAAIH,sBAAsB;oBACxB,MAAMQ,QAAQ,MAAM,IAAI,CAAC,gBAAgB,CAAC;oBAC1CJ,SAAS,IAAI,CAACI;gBAChB;gBACA,OAAOD;YACT;QACF;QACA,OAAON;IACT;IAEA,MAAa,wBACXQ,KAAuB,EACvBC,WAAyB,EACzBC,SAAmB,EACnB;QACA,MAAMC,QAA8B,EAAE;QAEtC,MAAMC,oBAAoB,CACxBC,MACAC,qBACAC;YAEA,IAAI,AAA+B,YAA/B,OAAOD,qBACTA,sBAAsB;gBACpB,QAAQA;YACV;YAGF,IAAIJ,AAAcM,WAAdN,WACFI,sBAAsB;gBACpB,GAAGA,mBAAmB;gBACtBJ;YACF;YAEF,MAAMO,WAA4C;gBAChD,MAAM;gBACN,SAAS;gBACT,OAAOH;gBACP,SAASD,KAAK,OAAO;gBACrB,UAAU,OAAOvB,OAAO4B;wBAkDpBC,iBACiBC;oBAlDnB,MAAM,EAAEhB,IAAI,EAAE,GAAGc;oBACjBG,OACE/B,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,EAAE,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GACxC,CAAC,qDAAqD,EAAEgC,KAAK,SAAS,CACpEhC,QACC;oBAEL,IAAIiC;oBACJ,IAAIC;oBACJ,MAAMC,gBAAgC,CAACC;4BAE7BC,gBASJC;wBAVJL,cAAcG;wBACdF,QAAQG,QAAAA,OAAAA,KAAAA,IAAAA,QAAAA,CAAAA,iBAAAA,KAAM,QAAQ,AAAD,IAAbA,KAAAA,IAAAA,eAAgB,KAAK;wBAE7BvB,KAAK,GAAG,GAAG;4BACT,MAAMmB;wBACR;wBAEAnB,KAAK,KAAK,GAAGoB;wBAGb,IAAII,QAAAA,OAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,KAAM,QAAQ,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,eAAe,EACjCxB,KAAK,eAAe,GAAGsB,KAAK,QAAQ,CAAC,eAAe;oBAExD;oBACA,IAAI,CAAC,OAAO,CAAC,iBAAiB,GAAGD;oBACjC,MAAMI,WAAWhC,KAAK,GAAG;oBAGzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;oBACxD1B,KAAK,SAAS,GAAG0B;oBAEjB,MAAMC,aAAoC;wBACxC,MAAM;wBACN,IAAIF;wBACJ,YAAYC,UAAU,gBAAgB;wBACtC,QAAQ;oBACV;oBACA1B,KAAK,QAAQ,GAAG;wBAAC2B;qBAAW;oBAG5B,MAAMC,mBACJ1C,MAAM,KAAK,IAAK,IAAI,CAAC,SAAS,CAAS,qBAAqB,GACxD,MAAO,IAAI,CAAC,SAAS,CAAS,qBAAqB,CAACA,MAAM,KAAK,IAC/D0B;oBACN,MAAMiB,0BAA0B,CAAC,CAACD;oBAGlC,MAAME,cAAc5C,MAAM,MAAM;oBAChC,MAAM6C,oBAAAA,QACJhB,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACe;oBACnC,MAAME,aAAahB,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;oBACzD,MAAMiB,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ,IAAI,EACJF,YACAF,aACA5C,MAAM,SAAS;oBAErB,MAAMiD,eAAe,CAAC,CAACF;oBAGvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBvB,SADAyB,qBAAqBnD,OAAOwC,UAAU,IAAI;oBAEhD,MAAMY,cAAc,CAAC,CAACF;oBAGtB,MAAMG,sBACJ,AAACV,2BAA4BM,gBAAiBG,cAW1C1B,SATE,OAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACvB1B,OACA;wBAEE,SAASwC;oBACX,GACArB,YAAW,EAEb,OAAO;oBAEf,MAAMmC,kBAAkB,CAAC,CAACD;oBAE1B,MAAME,UACJb,oBACAK,oBACAG,mBACAG;oBAGF,IAAIG;oBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDjD,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;wBACF,MAAMyD,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDF,QAAQ,IAAI,EACZA,AAA6B7B,WAA7B6B,QAAQ,gBAAgB,GACpB;4BAAE,iBAAiBA,QAAQ,gBAAgB;wBAAC,IAC5C7B;wBAEN,IAAI+B,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;4BAC9C9D,MACE,uCACAiD,aACAa;4BAEFD,oBAAoBC;4BACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;gCACE,MAAM;gCACN,QAAQb;gCACR,OAAOa;4BACT,GACAZ;wBAEJ,OACElD,MACE,yDACAiD;oBAGN,EAAE,OAAOe,OAAO;wBACdhE,MAAM,kCAAkCgE;oBAC1C;yBAEAhE,MAAM;oBAGV,IAAI,CAAC4D,SACH,MAAM,IAAIK,MAAM,CAAC,mBAAmB,EAAE5D,MAAM,MAAM,EAAE;oBAGtD,IAAI6D;oBAEJ,IAAIlB,yBACFkB,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,OAAO7D,MAAM,KAAK;wBACpB;oBACF;yBACK,IAAIiD,cACTY,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACPf;4BACA,aAAaU;wBACf;oBACF;yBACK,IAAIJ,aACTS,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,IAAIX,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,EAAE;4BACvB,MAAMA,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;wBAC7B;oBACF;yBACK,IAAII,iBACTO,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,QAAQ7D,MAAM,MAAM;wBACtB;oBACF;oBAGFyB,QAAAA,YAAAA,SAAW8B;oBAEX,OAAO;wBACL,QAAQ;4BACNA;wBACF;wBACAf;wBACAqB;oBACF;gBACF;YACF;YACA,OAAOlC;QACT;QAEA,KAAK,MAAMJ,QAAQL,MACjB,IAAIK,AAAc,aAAdA,KAAK,IAAI,EAAe;gBAIxBuC,cACAC;YAJF,IACE,CAACxC,KAAK,MAAM,IACZA,AAAgB,SAAhBA,KAAK,MAAM,IACXuC,AAAAA,SAAAA,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,EAAE,AAAD,MAAM,QACpBC,AAAAA,SAAAA,CAAAA,gBAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,cAAa,EAAE,AAAD,MAAM,QACpB;gBACApE,MAAM,kDAAkD4B;gBACxD;YACF;YACA,MAAMyC,aAAa1C,kBAAkBC,MAAMA,KAAK,MAAM;YAEtDF,MAAM,IAAI,CAAC2C;QACb,OAAO,IAAIzC,AAAc,YAAdA,KAAK,IAAI,EAAc;gBAMH0C;YAL7B,MAAMC,kBACJ;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO3C,KAAK,KAAK;gBACjB,SAASA,KAAK,OAAO,aAAI0C,CAAAA,cAAAA,KAAK,KAAK,AAAD,IAATA,KAAAA,IAAAA,YAAY,OAAO,AAAD;gBAC3C,QAAQ1C,KAAK,MAAM;gBACnB,UAAU;wBAEW0C;oBADnB,MAAM,IAAIL,MACRrC,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO,AAAD,KAAC,SAAI0C,CAAAA,cAAAA,KAAK,KAAK,AAAD,IAATA,KAAAA,IAAAA,YAAY,OAAO,AAAD,KAAK;gBAE5C;YACF;YACF5C,MAAM,IAAI,CAAC6C;QACb,OAAO,IAAI3C,AAAc,eAAdA,KAAK,IAAI,EAAiB;YACnC,MAAM4C,qBAAqD;gBACzD,MAAM;gBACN,SAAS;gBACT,OAAO;gBACP,SAAS5C,KAAK,OAAO;gBACrB,QAAQA,KAAK,MAAM;gBACnB,UAAU,OAAOvB,SAAW;YAC9B;YACAqB,MAAM,IAAI,CAAC8C;QACb,OAAO,IAAI5C,AAAc,YAAdA,KAAK,IAAI,EAAc;YAChC,MAAM6C,kBACJ;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO7C,KAAK,KAAK;gBACjB,SAASA,KAAK,OAAO;gBACrB,QAAQA,KAAK,MAAM;gBACnB,UAAU,OAAO8C;oBACf,MAAMC,yBAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;gBACnC;YACF;YACFhD,MAAM,IAAI,CAAC+C;QACb,OAAO;YAEL,MAAMG,WAAWhD,KAAK,IAAI;YAC1B,MAAMiD,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;YACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACC,SAAWA,OAAO,IAAI,KAAKF;YAC5D,MAAMvE,QAAQuB,KAAK,KAAK;YAExB,IAAI,CAACkD,QACH,MAAM,IAAIb,MAAM,CAAC,aAAa,EAAEW,SAAS,WAAW,CAAC;YAIvD,MAAMG,eAAeD,SACjBE,4BAA4BF,OAAO,WAAW,IAC9C,EAAE;YAEN,MAAMG,uBAAuBH,SACzBE,4BAA4BF,OAAO,WAAW,EAAE,QAChD,EAAE;YAENC,aAAa,OAAO,CAAC,CAACG;gBACpB,IAAI7E,KAAK,CAAC6E,MAAM,EAAE;oBAChB,MAAM3E,aAAaH,oBAAoBC,KAAK,CAAC6E,MAAM;oBACnDlF,MACE,uCACA,CAAC,YAAY,EAAE4E,UAAU,EACzB,CAAC,MAAM,EAAEvC,KAAK,SAAS,CAAChC,KAAK,CAAC6E,MAAM,GAAG,EACvC,CAAC,WAAW,EAAE7C,KAAK,SAAS,CAAC9B,aAAa;oBAE5C,MAAM4E,aAAaxD,kBACjBpB,YACAF,KAAK,CAAC6E,MAAM,EACZ,CAAC7D;wBACChB,KAAK,CAAC6E,MAAM,GAAG7D;oBACjB;oBAEFK,MAAM,IAAI,CAACyD;gBACb,OAAO;oBACL/C,OACE,CAAC6C,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAEN,UAAU;oBAE3E5E,MAAM,CAAC,OAAO,EAAEkF,MAAM,6BAA6B,EAAEN,UAAU;gBACjE;YACF;YAEA,MAAMzD,OAKF;gBACF,MAAM;gBACN,SAASyD;gBACT,SAAShD,KAAK,OAAO;gBACrB,OAAOA,KAAK,KAAK;gBACjB,UAAU,OAAOvB,OAAOW;wBAKOoE;oBAJ7BpF,MACE,oBACA4E,UACAvE,OACA,CAAC,wBAAwB,EAAE,QAAA+E,CAAAA,mBAAAA,QAAQ,OAAO,AAAD,IAAdA,KAAAA,IAAAA,iBAAiB,MAAM,EAAE;oBAItD,MAAMvC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;oBACxD7B,QAAQ,IAAI,CAAC,SAAS,GAAG6B;oBAEzBoC,qBAAqB,OAAO,CAAC,CAACC;wBAC5B9C,OACE/B,KAAK,CAAC6E,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAEN,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;oBAE9G;oBAEA,IAAI;wBACF,MAAMS,QAAQ,GAAG,CAAC;4BACf;gCACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;oCACrCrF,MAAM;oCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC8E,OAAO,IAAI,EAAEzE;oCACrDL,MAAM;gCACR;4BACF;4BACA2E,yBAAM;yBACP;oBACH,EAAE,OAAOW,eAAoB;wBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;wBACnC,MAAM,IAAIrB,MACR,CAAC,wCAAwC,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAES,iBAAiB,EAC5E;4BAAE,OAAOD;wBAAc;oBAE3B;oBAGA,IAAIR,OAAO,WAAW,EACpB,IAAI;wBACFzE,QAAQoF,iBAAiBpF,OAAOyE,OAAO,WAAW;oBACpD,EAAE,OAAOd,OAAY;wBACnB,MAAM,IAAIC,MACR,CAAC,8BAA8B,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAEd,MAAM,OAAO,CAAC,cAAc,EAAE3B,KAAK,SAAS,CAAChC,QAAQ,EACtG;4BAAE,OAAO2D;wBAAM;oBAEnB;oBAGFhE,MAAM,kBAAkB8E,OAAO,IAAI;oBACnC,MAAMY,WAAWZ,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;oBAChD,MAAMY,SAASrF,OAAOW;oBACtBhB,MAAM,iBAAiB8E,OAAO,IAAI;oBAElC,IAAI;wBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;4BACpC9E,MAAM;4BACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC8E,OAAO,IAAI,EAAEzE;4BACpDL,MAAM;wBACR;oBACF,EAAE,OAAOsF,eAAoB;wBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;wBACnC,MAAM,IAAIrB,MACR,CAAC,uCAAuC,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAES,iBAAiB,EAC3E;4BAAE,OAAOD;wBAAc;oBAE3B;oBAEA,OAAO;wBACL,QAAQ;4BACN,SAAS;4BACT,QAAQV;4BACR,OAAOvE;wBACT;oBACF;gBACF;YACF;YACAqB,MAAM,IAAI,CAACP;QACb;QAGF,MAAMwE,eAAejE,MAAM,GAAG,CAC5B,CAACP,MAA0ByE;YACzB,IAAIzE,AAAc,aAAdA,KAAK,IAAI,EACX,OAAO,IAAI,CAAC,6BAA6B,CACvCA,MACAyE,UAAUlE,MAAM,MAAM,GAAG;YAG7B,OAAOP;QACT;QAGF,OAAO;YACL,OAAOwE;QACT;IACF;IAEA,MAAc,qBAAqBE,eAAgC,EAAE;QACnE,MAAMjD,WAAWhC,KAAK,GAAG;QACzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;QACxD,MAAMC,aAAoC;YACxC,MAAM;YACN,IAAIF;YACJ,YAAYC,UAAU,gBAAgB;YACtC,QAAQ;QACV;QAEAgD,gBAAgB,IAAI,CAAC,QAAQ,GAAG;YAAC/C;SAAW;QAC3C+C,gBAAgB,IAAI,CAA2B,SAAS,GAAGhD;QAE5D,OAAO;YACLA;QACF;IACF;IAEA,MAAM,uBAAuBiD,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,eAAe,IAAIC,SAASC,aAAa,UAAUJ,kBAAkB;YACzE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAEA,MAAM3E,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACL2E;YACF;YACA,UAAU,OAAOzF,OAAOwF;gBACtB,MAAM,IAAI,CAAC,oBAAoB,CAACA;gBAChC,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLE;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QAEA,MAAMC,aAAa,MAAM,CAAC7E;QAC1B,MAAM6E,aAAa,KAAK;QAExB,OAAO;YACL,UAAUA;QACZ;IACF;IAEQ,mBACNF,eAAuB,EACvBK,aAAiC,EACjC3E,WAAyB,EACG;QAC5B,MAAML,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACL2E;YACF;YACA,UAAU,OAAOzF,OAAOwF;gBACtB,MAAMO,YAAYxF,KAAK,GAAG;gBAC1B,MAAM,EAAEiC,SAAS,EAAE,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAACgD;gBACtD,MAAM,EAAEQ,MAAM,EAAE,GAAG7E;gBACnB,MAAM8E,qBACJD,AAAW,kBAAXA,SAA2B7E,YAAY,kBAAkB,GAAGO;gBAE9DK,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMyC,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD7E,MACE,sCACA6E,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhD1C,OAAOmE,MAAM,OAAO,CAAC1B,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpB2B,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAOH,AAAAA,CAAAA,qBAAqBI,iBAAiB9E,cAAAA,EAC9DvB,MAAM,eAAe,EACrB;oBACE,SAASwC;oBACTsD;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CtB;oBACArD;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFxB,MAAM,cAAcqC,KAAK,SAAS,CAACoE,YAAY,MAAM;gBAErD,MAAM,EACJE,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClC7C,KAAK,EACLzB,KAAK,EACLuE,WAAW,EACXnC,KAAK,EACN,GAAG8B;gBAEJZ,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCiB;gBACF;gBACAjB,gBAAgB,IAAI,CAAC,KAAK,GAAGtD;gBAE7B,MAAMwE,eAAeJ,WAAW,EAAE;gBAElC,IAAIhC,OAAO;oBACT,MAAMqC,UAAUpG,KAAK,GAAG;oBACxB,MAAMqG,gBAAgBtC,QAASqC,CAAAA,UAAUZ,SAAQ;oBACjD,IAAIa,gBAAgB,GAClBF,aAAa,IAAI,CAAC;wBAChB,MAAM;wBACN,OAAO;4BACL,QAAQE;wBACV;wBACA,QAAQ;oBACV;gBAEJ;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrB3E,OACE,CAACyE,sCAAsClC,OACvCX,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAAS+C;wBACTF;wBACAD;wBACA,UAAUH,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA5D;gBACF;YACF;QACF;QAEA,OAAO1B;IACT;IAEA,MAAM,SACJ+F,KAAa,EACb3F,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMwE,eAAe,IAAIC,SAASiB,OAAO;YACvC,aAAa,IAAI,CAAC,mBAAmB;QACvC;QACA,MAAM,EAAExF,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACH,OAAOC;QAC5D,MAAMwE,aAAa,MAAM,CAACtE;QAC1B,MAAML,SAAS,MAAM2E,aAAa,KAAK;QACvC,MAAM,EAAEmB,MAAM,EAAE,GAAG9F;QACnB,OAAO;YACL8F;YACA,UAAUnB;QACZ;IACF;IAEQ,wBAAwBoB,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACGjH,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJqH,UAAkB,EAClB/F,WAAyB,EACzB2E,aAAsB,EACtB1E,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMuE,eAAe,IAAIC,SAASC,aAAa,UAAUqB,aAAa;YACpE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAEA,IAAIC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvDlG,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAIgG,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO,IAAI,CAAC,eAAe,CAAC1B,cAAc2B,UAAUnG;YACtD;YAGA,MAAMoG,eAAe,IAAI,CAAC,kBAAkB,CAC1CL,YACApB,eACA3E;YAGF,MAAMwE,aAAa,MAAM,CAAC4B;YAC1B,MAAMvG,SAAS,MAAM2E,aAAa,KAAK;YACvC,MAAMS,aAAiCpF,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAI2E,aAAa,cAAc,IAC7B,OAAO;gBACL,QAAQS;gBACR,UAAUT;YACZ;YAIF,MAAMzE,QAAQkF,WAAW,OAAO,IAAI,EAAE;YACtCgB,SAAS,IAAI,IAAKhB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAIoB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CtG,OACAC,aACAC;gBAEFuE,aAAa,MAAM,CAAC6B,YAAY,KAAK;YACvC,EAAE,OAAO7D,OAAO;gBACd,OAAO,IAAI,CAAC,eAAe,CACzBgC,cACA,CAAC,4CAA4C,EAAEhC,MAAM,SAAS,EAAE3B,KAAK,SAAS,CAC5Ed,QACC,EACHC;YAEJ;YAEA,MAAMwE,aAAa,KAAK;YACxB,IAAIA,aAAa,cAAc,IAC7B,OAAO;gBACL,QAAQjE;gBACR,UAAUiE;YACZ;YAIF,IAAI,CAACS,WAAW,kCAAkC,EAChD;YAIFe;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACA,UAAUzB;QACZ;IACF;IAEQ,oBACN8B,IAAsE,EACtEC,MAA2B,EAC3BvG,WAAyB,EACzBwG,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO1H,OAAO4B;gBACtB,MAAM,EAAEd,IAAI,EAAE,GAAGc;gBACjB,IAAIK;gBACJ,MAAME,gBAAgC,CAACC;oBACrCH,cAAcG;gBAChB;gBACA,IAAI,CAAC,OAAO,CAAC,iBAAiB,GAAGD;gBAGjC,MAAMI,WAAWhC,KAAK,GAAG;gBACzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;gBACxD1B,KAAK,SAAS,GAAG0B;gBAEjB,MAAMC,aAAoC;oBACxC,MAAM;oBACN,IAAIF;oBACJ,YAAYC,UAAU,gBAAgB;oBACtC,QAAQ;gBACV;gBACA1B,KAAK,QAAQ,GAAG;oBAAC2B;iBAAW;gBAE5B,MAAMqF,mBAAmBL,AAAS,YAATA;gBACzB,IAAIM,cAAcL;gBAClB,IAAIM,cAAc;gBAClB,IAAIF,oBAAqBL,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEO,cAAc;oBACd,MAAMC,gBACJR,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIK,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGP,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,MAAM,EAAEQ,IAAI,EAAEhG,KAAK,EAAEiG,OAAO,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACzDJ,aACA5G,aACAwG,KACAC;gBAGF,IAAIQ,eAAeF;gBACnB,IAAIJ,kBAEF,IAAI,AAAgB,YAAhB,OAAOI,MACTE,eAAeF;qBACV;oBACLnG,OACE0F,AAAS,cAATA,OAAqBS,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACF,YAAY,AAAD,MAAMtG,SAAY,MACzD;oBAEF0G,eAAgBF,IAAY,CAACF,YAAY;gBAC3C;gBAGF,OAAO;oBACL,QAAQI;oBACR,KAAKnG;oBACLC;oBACAiG;gBACF;YACF;QACF;QAEA,OAAON;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BvG,WAAyB,EACzBwG,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjC,eAAe,IAAIC,SACvBC,aACE4B,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS1F,KAAK,SAAS,CAAC0F,UAEvD;YACE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAGF,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAvG,aACAwG,KACAC;QAGF,MAAMjC,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACkC;QAC7D,MAAM7G,SAAS,MAAM2E,aAAa,KAAK;QAEvC,IAAI,CAAC3E,QACH,MAAM,IAAI4C,MACR;QAIJ,MAAM,EAAEkD,MAAM,EAAEqB,OAAO,EAAE,GAAGnH;QAE5B,OAAO;YACL8F;YACAqB;YACA,UAAUxC;QACZ;IACF;IAEA,MAAc,gBACZA,YAAsB,EACtB2B,QAAgB,EAChBnG,WAAyB,EACzB;QACA,MAAMkH,YAAsD;YAC1D,MAAM;YACN,OAAO;gBACL,SAASf;YACX;YACA,QAAQ;QACV;QACA,MAAM,EAAEjG,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClD;YAACgH;SAAU,EACXlH;QAEF,MAAMwE,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACtE,KAAK,CAAC,EAAE;QACrE,MAAMsE,aAAa,KAAK;QAExB,OAAO;YACL,QAAQjE;YACR,UAAUiE;QACZ;IACF;IAEA,MAAM,aAAa2C,MAAc,EAAEnH,WAAyB,EAAE;QAC5D,MAAMoH,YAAsD;YAC1D,MAAM;YACN,OAAO;gBACLD;YACF;YACA,QAAQ;QACV;QAEA,MAAM,EAAE,OAAOE,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;YAACD;SAAU,EACXpH;QAGF,OAAO,IAAI,CAAC,6BAA6B,CAACqH,UAAU,CAAC,EAAE;IACzD;IAEA,MAAM,QACJC,SAAsB,EACtBd,GAA+B,EAC/BxG,WAAyB,EACO;QAChC,MAAM,EAAEuH,UAAU,EAAEd,gBAAgB,EAAE,GAAGe,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM/C,eAAe,IAAIC,SAASC,aAAa,WAAW+C,cAAc;YACtE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QACA,MAAM,EAAEC,SAAS,EAAEC,eAAe,EAAE,GAAGnB;QAEvC5F,OAAO0G,WAAW;QAClB1G,OAAO8G,WAAW;QAClB9G,OAAO+G,iBAAiB;QAExB/G,OACE+G,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBxI,KAAK,GAAG;QACjC,IAAIwF,YAAYxF,KAAK,GAAG;QACxB,IAAIyI,eAAe;QACnB,MAAOzI,KAAK,GAAG,KAAKwI,mBAAmBF,UAAW;YAChD9C,YAAYxF,KAAK,GAAG;YACpB,MAAMsH,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAa,YACAvH,aACA;gBACE,iBAAiB;YACnB,GACAyG;YAGF,MAAMjC,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACkC;YAC7D,MAAM7G,SAAU,MAAM2E,aAAa,KAAK;YAOxC,IAAI3E,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQU;gBACR,UAAUiE;YACZ;YAGFqD,eACEhI,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAE0H,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAM1I,KAAK,GAAG;YACpB,IAAI0I,MAAMlD,YAAY+C,iBAAiB;gBACrC,MAAMlC,gBAAgBkC,kBAAmBG,CAAAA,MAAMlD,SAAQ;gBACvD,MAAMmD,YAAY,MAAM,IAAI,CAAC,YAAY,CAACtC,eAAezF;gBACzD,MAAMwE,aAAa,MAAM,CAACuD;YAC5B;QACF;QAEA,OAAO,IAAI,CAAC,eAAe,CACzBvD,cACA,CAAC,iBAAiB,EAAEqD,cAAc,EAClC7H;IAEJ;IAv/BA,YACEgI,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QAzBF;QAEA;QAEA;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;IACjC;AAy+BF"}
1
+ {"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import {\n ConversationHistory,\n findAllMidsceneLocatorField,\n parseActionParam,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { Executor } from '@/ai-model/action-executor';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Insight from '@/insight';\nimport type {\n AIUsageInfo,\n DetailedLocateParam,\n DumpSubscriber,\n ElementCacheFeature,\n ExecutionRecorderItem,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanning,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n ExecutorContext,\n InsightDump,\n InsightExtractOption,\n InsightExtractParam,\n InterfaceType,\n LocateResultElement,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamError,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n PlanningLocateParam,\n} from '@/types';\nimport { sleep } from '@/utils';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { taskTitleStr } from './ui-utils';\nimport {\n matchElementFromCache,\n matchElementFromPlan,\n parsePrompt,\n} from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n executor: Executor;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n insight: Insight;\n\n taskCache?: TaskCache;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n insight: Insight,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.insight = insight;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n }\n\n private async recordScreenshot(timing: ExecutionRecorderItem['timing']) {\n const base64 = await this.interface.screenshotBase64();\n const item: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: Date.now(),\n screenshot: base64,\n timing,\n };\n return item;\n }\n\n private prependExecutorWithScreenshot(\n taskApply: ExecutionTaskApply,\n appendAfterExecution = false,\n ): ExecutionTaskApply {\n const taskWithScreenshot: ExecutionTaskApply = {\n ...taskApply,\n executor: async (param, context, ...args) => {\n const recorder: ExecutionRecorderItem[] = [];\n const { task } = context;\n // set the recorder before executor in case of error\n task.recorder = recorder;\n const shot = await this.recordScreenshot(`before ${task.type}`);\n recorder.push(shot);\n\n const result = await taskApply.executor(param, context, ...args);\n\n if (appendAfterExecution) {\n const shot2 = await this.recordScreenshot('after Action');\n recorder.push(shot2);\n }\n return result;\n },\n };\n return taskWithScreenshot;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n cacheable?: boolean,\n ) {\n const tasks: ExecutionTaskApply[] = [];\n\n const taskForLocatePlan = (\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply => {\n if (typeof detailedLocateParam === 'string') {\n detailedLocateParam = {\n prompt: detailedLocateParam,\n };\n }\n // Apply cacheable option from convertPlanToExecutable if it was explicitly set\n if (cacheable !== undefined) {\n detailedLocateParam = {\n ...detailedLocateParam,\n cacheable,\n };\n }\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n param: detailedLocateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n assert(\n param?.prompt || param?.id || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n let insightDump: InsightDump | undefined;\n let usage: AIUsageInfo | undefined;\n const dumpCollector: DumpSubscriber = (dump) => {\n insightDump = dump;\n usage = dump?.taskInfo?.usage;\n\n task.log = {\n dump: insightDump,\n };\n\n task.usage = usage;\n\n // Store searchAreaUsage in task metadata\n if (dump?.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n this.insight.onceDumpUpdatedFn = dumpCollector;\n const shotTime = Date.now();\n\n // Get context through contextRetrieverFn which handles frozen context\n const uiContext = await this.insight.contextRetrieverFn('locate');\n task.uiContext = uiContext;\n\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Insight',\n };\n task.recorder = [recordItem];\n\n // try matching xpath\n const elementFromXpath =\n param.xpath && (this.interface as any).getElementInfoByXpath\n ? await (this.interface as any).getElementInfoByXpath(param.xpath)\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n // try matching cache\n const cachePrompt = param.prompt;\n const locateCacheRecord =\n this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n this,\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n // try matching plan\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param, uiContext.tree)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n // try ai locate\n const elementFromAiLocate =\n !userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag\n ? (\n await this.insight.locate(\n param,\n {\n // fallback to ai locate\n context: uiContext,\n },\n modelConfig,\n )\n ).element\n : undefined;\n const aiLocateHitFlag = !!elementFromAiLocate;\n\n const element =\n elementFromXpath || // highest priority\n elementFromCache || // second priority\n elementFromPlan || // third priority\n elementFromAiLocate;\n\n // update cache\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n element.isOrderSensitive !== undefined\n ? { _orderSensitive: element.isOrderSensitive }\n : undefined,\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n if (!element) {\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n id: elementFromPlan?.id,\n bbox: elementFromPlan?.bbox,\n },\n };\n } else if (aiLocateHitFlag) {\n hitBy = {\n from: 'AI model',\n context: {\n prompt: param.prompt,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n uiContext,\n hitBy,\n };\n },\n };\n return taskFind;\n };\n\n for (const plan of plans) {\n if (plan.type === 'Locate') {\n if (\n !plan.locate ||\n plan.locate === null ||\n plan.locate?.id === null ||\n plan.locate?.id === 'null'\n ) {\n debug('Locate action with id is null, will be ignored', plan);\n continue;\n }\n const taskLocate = taskForLocatePlan(plan, plan.locate);\n\n tasks.push(taskLocate);\n } else if (plan.type === 'Error') {\n const taskActionError: ExecutionTaskActionApply<PlanningActionParamError> =\n {\n type: 'Action',\n subType: 'Error',\n param: plan.param,\n thought: plan.thought || plan.param?.thought,\n locate: plan.locate,\n executor: async () => {\n throw new Error(\n plan?.thought || plan.param?.thought || 'error without thought',\n );\n },\n };\n tasks.push(taskActionError);\n } else if (plan.type === 'Finished') {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n executor: async (param) => {},\n };\n tasks.push(taskActionFinished);\n } else if (plan.type === 'Sleep') {\n const taskActionSleep: ExecutionTaskActionApply<PlanningActionParamSleep> =\n {\n type: 'Action',\n subType: 'Sleep',\n param: plan.param,\n thought: plan.thought,\n locate: plan.locate,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n tasks.push(taskActionSleep);\n } else {\n // action in action space\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((action) => action.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n // find all params that needs location\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = taskForLocatePlan(\n locatePlan,\n param[field],\n (result) => {\n param[field] = result;\n },\n );\n tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n executor: async (param, context) => {\n debug(\n 'executing action',\n planType,\n param,\n `context.element.center: ${context.element?.center}`,\n );\n\n // Get context for actionSpace operations to ensure size info is available\n const uiContext = await this.insight.contextRetrieverFn('locate');\n context.task.uiContext = uiContext;\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n // Validate and parse parameters with defaults\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, context);\n debug('called action', action.name);\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n // Return a proper result for report generation\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n tasks.push(task);\n }\n }\n\n const wrappedTasks = tasks.map(\n (task: ExecutionTaskApply, index: number) => {\n if (task.type === 'Action') {\n return this.prependExecutorWithScreenshot(\n task,\n index === tasks.length - 1,\n );\n }\n return task;\n },\n );\n\n return {\n tasks: wrappedTasks,\n };\n }\n\n private async setupPlanningContext(executorContext: ExecutorContext) {\n const shotTime = Date.now();\n const uiContext = await this.insight.contextRetrieverFn('locate');\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Planning',\n };\n\n executorContext.task.recorder = [recordItem];\n (executorContext.task as ExecutionTaskPlanning).uiContext = uiContext;\n\n return {\n uiContext,\n };\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const taskExecutor = new Executor(taskTitleStr('Action', userInstruction), {\n onTaskStart: this.onTaskStartCallback,\n });\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n await this.setupPlanningContext(executorContext);\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n\n await taskExecutor.append(task);\n await taskExecutor.flush();\n\n return {\n executor: taskExecutor,\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = await this.setupPlanningContext(executorContext);\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push({\n type: 'Sleep',\n param: {\n timeMs: timeRemaining,\n },\n locate: null,\n } as PlanningAction<PlanningActionParamSleep>);\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const taskExecutor = new Executor(title, {\n onTaskStart: this.onTaskStartCallback,\n });\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n await taskExecutor.append(tasks);\n const result = await taskExecutor.flush();\n const { output } = result!;\n return {\n output,\n executor: taskExecutor,\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const taskExecutor = new Executor(taskTitleStr('Action', userPrompt), {\n onTaskStart: this.onTaskStartCallback,\n });\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return this.appendErrorPlan(taskExecutor, errorMsg, modelConfig);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n await taskExecutor.append(planningTask);\n const result = await taskExecutor.flush();\n const planResult: PlanningAIResponse = result?.output;\n if (taskExecutor.isInErrorState()) {\n return {\n output: planResult,\n executor: taskExecutor,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfig,\n cacheable,\n );\n taskExecutor.append(executables.tasks);\n } catch (error) {\n return this.appendErrorPlan(\n taskExecutor,\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n modelConfig,\n );\n }\n\n await taskExecutor.flush();\n if (taskExecutor.isInErrorState()) {\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n executor: taskExecutor,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: InsightExtractParam,\n modelConfig: IModelConfig,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let insightDump: InsightDump | undefined;\n const dumpCollector: DumpSubscriber = (dump) => {\n insightDump = dump;\n };\n this.insight.onceDumpUpdatedFn = dumpCollector;\n\n // Get context for query operations\n const shotTime = Date.now();\n const uiContext = await this.insight.contextRetrieverFn('extract');\n task.uiContext = uiContext;\n\n const recordItem: ExecutionRecorderItem = {\n type: 'screenshot',\n ts: shotTime,\n screenshot: uiContext.screenshotBase64,\n timing: 'before Extract',\n };\n task.recorder = [recordItem];\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n const { data, usage, thought } = await this.insight.extract<any>(\n demandInput,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n return {\n output: outputResult,\n log: insightDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: InsightExtractParam,\n modelConfig: IModelConfig,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const taskExecutor = new Executor(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n {\n onTaskStart: this.onTaskStartCallback,\n },\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));\n const result = await taskExecutor.flush();\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n executor: taskExecutor,\n };\n }\n\n private async appendErrorPlan(\n taskExecutor: Executor,\n errorMsg: string,\n modelConfig: IModelConfig,\n ) {\n const errorPlan: PlanningAction<PlanningActionParamError> = {\n type: 'Error',\n param: {\n thought: errorMsg,\n },\n locate: null,\n };\n const { tasks } = await this.convertPlanToExecutable(\n [errorPlan],\n modelConfig,\n );\n await taskExecutor.append(this.prependExecutorWithScreenshot(tasks[0]));\n await taskExecutor.flush();\n\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n async taskForSleep(timeMs: number, modelConfig: IModelConfig) {\n const sleepPlan: PlanningAction<PlanningActionParamSleep> = {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n // The convertPlanToExecutable requires modelConfig as a parameter but will not consume it when type is Sleep\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [sleepPlan],\n modelConfig,\n );\n\n return this.prependExecutorWithScreenshot(sleepTasks[0]);\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const taskExecutor = new Executor(taskTitleStr('WaitFor', description), {\n onTaskStart: this.onTaskStartCallback,\n });\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n await taskExecutor.append(this.prependExecutorWithScreenshot(queryTask));\n const result = (await taskExecutor.flush()) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n executor: taskExecutor,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = await this.taskForSleep(timeRemaining, modelConfig);\n await taskExecutor.append(sleepTask);\n }\n }\n\n return this.appendErrorPlan(\n taskExecutor,\n `waitFor timeout: ${errorThought}`,\n modelConfig,\n );\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","locatePlanForLocate","param","locate","locatePlan","TaskExecutor","timing","base64","item","Date","taskApply","appendAfterExecution","taskWithScreenshot","context","args","recorder","task","shot","result","shot2","plans","modelConfig","cacheable","tasks","taskForLocatePlan","plan","detailedLocateParam","onResult","undefined","taskFind","taskContext","_this_taskCache","_locateCacheRecord_cacheContent","assert","JSON","insightDump","usage","dumpCollector","dump","_dump_taskInfo","_dump_taskInfo1","shotTime","uiContext","recordItem","elementFromXpath","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","aiLocateHitFlag","element","currentCacheEntry","feature","Object","error","Error","hitBy","_plan_locate","_plan_locate1","taskLocate","_plan_param","taskActionError","taskActionFinished","taskActionSleep","taskParam","sleep","planType","actionSpace","action","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","locateTask","_context_element","Promise","originalError","originalMessage","String","parseActionParam","actionFn","wrappedTasks","index","executorContext","userInstruction","yamlString","taskExecutor","Executor","taskTitleStr","actionContext","startTime","vlMode","uiTarsModelVersion","Array","console","planResult","uiTarsPlanning","actions","log","more_actions_needed_by_instruction","rawResponse","finalActions","timeNow","timeRemaining","title","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","data","thought","outputResult","errorPlan","timeMs","sleepPlan","sleepTasks","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","insight","opts","ConversationHistory"],"mappings":";;;;;;;;;;;;;;;;;;AA6DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAEO,MAAMC;IAcX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAmBA,MAAc,iBAAiBC,MAAuC,EAAE;QACtE,MAAMC,SAAS,MAAM,IAAI,CAAC,SAAS,CAAC,gBAAgB;QACpD,MAAMC,OAA8B;YAClC,MAAM;YACN,IAAIC,KAAK,GAAG;YACZ,YAAYF;YACZD;QACF;QACA,OAAOE;IACT;IAEQ,8BACNE,SAA6B,EAC7BC,uBAAuB,KAAK,EACR;QACpB,MAAMC,qBAAyC;YAC7C,GAAGF,SAAS;YACZ,UAAU,OAAOR,OAAOW,SAAS,GAAGC;gBAClC,MAAMC,WAAoC,EAAE;gBAC5C,MAAM,EAAEC,IAAI,EAAE,GAAGH;gBAEjBG,KAAK,QAAQ,GAAGD;gBAChB,MAAME,OAAO,MAAM,IAAI,CAAC,gBAAgB,CAAC,CAAC,OAAO,EAAED,KAAK,IAAI,EAAE;gBAC9DD,SAAS,IAAI,CAACE;gBAEd,MAAMC,SAAS,MAAMR,UAAU,QAAQ,CAACR,OAAOW,YAAYC;gBAE3D,IAAIH,sBAAsB;oBACxB,MAAMQ,QAAQ,MAAM,IAAI,CAAC,gBAAgB,CAAC;oBAC1CJ,SAAS,IAAI,CAACI;gBAChB;gBACA,OAAOD;YACT;QACF;QACA,OAAON;IACT;IAEA,MAAa,wBACXQ,KAAuB,EACvBC,WAAyB,EACzBC,SAAmB,EACnB;QACA,MAAMC,QAA8B,EAAE;QAEtC,MAAMC,oBAAoB,CACxBC,MACAC,qBACAC;YAEA,IAAI,AAA+B,YAA/B,OAAOD,qBACTA,sBAAsB;gBACpB,QAAQA;YACV;YAGF,IAAIJ,AAAcM,WAAdN,WACFI,sBAAsB;gBACpB,GAAGA,mBAAmB;gBACtBJ;YACF;YAEF,MAAMO,WAA4C;gBAChD,MAAM;gBACN,SAAS;gBACT,OAAOH;gBACP,SAASD,KAAK,OAAO;gBACrB,UAAU,OAAOvB,OAAO4B;wBAkDpBC,iBACiBC;oBAlDnB,MAAM,EAAEhB,IAAI,EAAE,GAAGc;oBACjBG,OACE/B,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,EAAE,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GACxC,CAAC,qDAAqD,EAAEgC,KAAK,SAAS,CACpEhC,QACC;oBAEL,IAAIiC;oBACJ,IAAIC;oBACJ,MAAMC,gBAAgC,CAACC;4BAE7BC,gBASJC;wBAVJL,cAAcG;wBACdF,QAAQG,QAAAA,OAAAA,KAAAA,IAAAA,QAAAA,CAAAA,iBAAAA,KAAM,QAAQ,AAAD,IAAbA,KAAAA,IAAAA,eAAgB,KAAK;wBAE7BvB,KAAK,GAAG,GAAG;4BACT,MAAMmB;wBACR;wBAEAnB,KAAK,KAAK,GAAGoB;wBAGb,IAAII,QAAAA,OAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kBAAAA,KAAM,QAAQ,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,eAAe,EACjCxB,KAAK,eAAe,GAAGsB,KAAK,QAAQ,CAAC,eAAe;oBAExD;oBACA,IAAI,CAAC,OAAO,CAAC,iBAAiB,GAAGD;oBACjC,MAAMI,WAAWhC,KAAK,GAAG;oBAGzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;oBACxD1B,KAAK,SAAS,GAAG0B;oBAEjB,MAAMC,aAAoC;wBACxC,MAAM;wBACN,IAAIF;wBACJ,YAAYC,UAAU,gBAAgB;wBACtC,QAAQ;oBACV;oBACA1B,KAAK,QAAQ,GAAG;wBAAC2B;qBAAW;oBAG5B,MAAMC,mBACJ1C,MAAM,KAAK,IAAK,IAAI,CAAC,SAAS,CAAS,qBAAqB,GACxD,MAAO,IAAI,CAAC,SAAS,CAAS,qBAAqB,CAACA,MAAM,KAAK,IAC/D0B;oBACN,MAAMiB,0BAA0B,CAAC,CAACD;oBAGlC,MAAME,cAAc5C,MAAM,MAAM;oBAChC,MAAM6C,oBAAAA,QACJhB,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACe;oBACnC,MAAME,aAAahB,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;oBACzD,MAAMiB,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ,IAAI,EACJF,YACAF,aACA5C,MAAM,SAAS;oBAErB,MAAMiD,eAAe,CAAC,CAACF;oBAGvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBvB,SADAyB,qBAAqBnD,OAAOwC,UAAU,IAAI;oBAEhD,MAAMY,cAAc,CAAC,CAACF;oBAGtB,MAAMG,sBACJ,AAACV,2BAA4BM,gBAAiBG,cAW1C1B,SATE,OAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACvB1B,OACA;wBAEE,SAASwC;oBACX,GACArB,YAAW,EAEb,OAAO;oBAEf,MAAMmC,kBAAkB,CAAC,CAACD;oBAE1B,MAAME,UACJb,oBACAK,oBACAG,mBACAG;oBAGF,IAAIG;oBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDjD,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;wBACF,MAAMyD,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDF,QAAQ,IAAI,EACZA,AAA6B7B,WAA7B6B,QAAQ,gBAAgB,GACpB;4BAAE,iBAAiBA,QAAQ,gBAAgB;wBAAC,IAC5C7B;wBAEN,IAAI+B,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;4BAC9C9D,MACE,uCACAiD,aACAa;4BAEFD,oBAAoBC;4BACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;gCACE,MAAM;gCACN,QAAQb;gCACR,OAAOa;4BACT,GACAZ;wBAEJ,OACElD,MACE,yDACAiD;oBAGN,EAAE,OAAOe,OAAO;wBACdhE,MAAM,kCAAkCgE;oBAC1C;yBAEAhE,MAAM;oBAGV,IAAI,CAAC4D,SACH,MAAM,IAAIK,MAAM,CAAC,mBAAmB,EAAE5D,MAAM,MAAM,EAAE;oBAGtD,IAAI6D;oBAEJ,IAAIlB,yBACFkB,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,OAAO7D,MAAM,KAAK;wBACpB;oBACF;yBACK,IAAIiD,cACTY,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACPf;4BACA,aAAaU;wBACf;oBACF;yBACK,IAAIJ,aACTS,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,IAAIX,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,EAAE;4BACvB,MAAMA,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;wBAC7B;oBACF;yBACK,IAAII,iBACTO,QAAQ;wBACN,MAAM;wBACN,SAAS;4BACP,QAAQ7D,MAAM,MAAM;wBACtB;oBACF;oBAGFyB,QAAAA,YAAAA,SAAW8B;oBAEX,OAAO;wBACL,QAAQ;4BACNA;wBACF;wBACAf;wBACAqB;oBACF;gBACF;YACF;YACA,OAAOlC;QACT;QAEA,KAAK,MAAMJ,QAAQL,MACjB,IAAIK,AAAc,aAAdA,KAAK,IAAI,EAAe;gBAIxBuC,cACAC;YAJF,IACE,CAACxC,KAAK,MAAM,IACZA,AAAgB,SAAhBA,KAAK,MAAM,IACXuC,AAAAA,SAAAA,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,EAAE,AAAD,MAAM,QACpBC,AAAAA,SAAAA,CAAAA,gBAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,cAAa,EAAE,AAAD,MAAM,QACpB;gBACApE,MAAM,kDAAkD4B;gBACxD;YACF;YACA,MAAMyC,aAAa1C,kBAAkBC,MAAMA,KAAK,MAAM;YAEtDF,MAAM,IAAI,CAAC2C;QACb,OAAO,IAAIzC,AAAc,YAAdA,KAAK,IAAI,EAAc;gBAMH0C;YAL7B,MAAMC,kBACJ;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO3C,KAAK,KAAK;gBACjB,SAASA,KAAK,OAAO,aAAI0C,CAAAA,cAAAA,KAAK,KAAK,AAAD,IAATA,KAAAA,IAAAA,YAAY,OAAO,AAAD;gBAC3C,QAAQ1C,KAAK,MAAM;gBACnB,UAAU;wBAEW0C;oBADnB,MAAM,IAAIL,MACRrC,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO,AAAD,KAAC,SAAI0C,CAAAA,cAAAA,KAAK,KAAK,AAAD,IAATA,KAAAA,IAAAA,YAAY,OAAO,AAAD,KAAK;gBAE5C;YACF;YACF5C,MAAM,IAAI,CAAC6C;QACb,OAAO,IAAI3C,AAAc,eAAdA,KAAK,IAAI,EAAiB;YACnC,MAAM4C,qBAAqD;gBACzD,MAAM;gBACN,SAAS;gBACT,OAAO;gBACP,SAAS5C,KAAK,OAAO;gBACrB,QAAQA,KAAK,MAAM;gBACnB,UAAU,OAAOvB,SAAW;YAC9B;YACAqB,MAAM,IAAI,CAAC8C;QACb,OAAO,IAAI5C,AAAc,YAAdA,KAAK,IAAI,EAAc;YAChC,MAAM6C,kBACJ;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO7C,KAAK,KAAK;gBACjB,SAASA,KAAK,OAAO;gBACrB,QAAQA,KAAK,MAAM;gBACnB,UAAU,OAAO8C;oBACf,MAAMC,yBAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;gBACnC;YACF;YACFhD,MAAM,IAAI,CAAC+C;QACb,OAAO;YAEL,MAAMG,WAAWhD,KAAK,IAAI;YAC1B,MAAMiD,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;YACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACC,SAAWA,OAAO,IAAI,KAAKF;YAC5D,MAAMvE,QAAQuB,KAAK,KAAK;YAExB,IAAI,CAACkD,QACH,MAAM,IAAIb,MAAM,CAAC,aAAa,EAAEW,SAAS,WAAW,CAAC;YAIvD,MAAMG,eAAeD,SACjBE,4BAA4BF,OAAO,WAAW,IAC9C,EAAE;YAEN,MAAMG,uBAAuBH,SACzBE,4BAA4BF,OAAO,WAAW,EAAE,QAChD,EAAE;YAENC,aAAa,OAAO,CAAC,CAACG;gBACpB,IAAI7E,KAAK,CAAC6E,MAAM,EAAE;oBAChB,MAAM3E,aAAaH,oBAAoBC,KAAK,CAAC6E,MAAM;oBACnDlF,MACE,uCACA,CAAC,YAAY,EAAE4E,UAAU,EACzB,CAAC,MAAM,EAAEvC,KAAK,SAAS,CAAChC,KAAK,CAAC6E,MAAM,GAAG,EACvC,CAAC,WAAW,EAAE7C,KAAK,SAAS,CAAC9B,aAAa;oBAE5C,MAAM4E,aAAaxD,kBACjBpB,YACAF,KAAK,CAAC6E,MAAM,EACZ,CAAC7D;wBACChB,KAAK,CAAC6E,MAAM,GAAG7D;oBACjB;oBAEFK,MAAM,IAAI,CAACyD;gBACb,OAAO;oBACL/C,OACE,CAAC6C,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAEN,UAAU;oBAE3E5E,MAAM,CAAC,OAAO,EAAEkF,MAAM,6BAA6B,EAAEN,UAAU;gBACjE;YACF;YAEA,MAAMzD,OAKF;gBACF,MAAM;gBACN,SAASyD;gBACT,SAAShD,KAAK,OAAO;gBACrB,OAAOA,KAAK,KAAK;gBACjB,UAAU,OAAOvB,OAAOW;wBAKOoE;oBAJ7BpF,MACE,oBACA4E,UACAvE,OACA,CAAC,wBAAwB,EAAE,QAAA+E,CAAAA,mBAAAA,QAAQ,OAAO,AAAD,IAAdA,KAAAA,IAAAA,iBAAiB,MAAM,EAAE;oBAItD,MAAMvC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;oBACxD7B,QAAQ,IAAI,CAAC,SAAS,GAAG6B;oBAEzBoC,qBAAqB,OAAO,CAAC,CAACC;wBAC5B9C,OACE/B,KAAK,CAAC6E,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAEN,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;oBAE9G;oBAEA,IAAI;wBACF,MAAMS,QAAQ,GAAG,CAAC;4BACf;gCACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;oCACrCrF,MAAM;oCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC8E,OAAO,IAAI,EAAEzE;oCACrDL,MAAM;gCACR;4BACF;4BACA2E,yBAAM;yBACP;oBACH,EAAE,OAAOW,eAAoB;wBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;wBACnC,MAAM,IAAIrB,MACR,CAAC,wCAAwC,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAES,iBAAiB,EAC5E;4BAAE,OAAOD;wBAAc;oBAE3B;oBAGA,IAAIR,OAAO,WAAW,EACpB,IAAI;wBACFzE,QAAQoF,iBAAiBpF,OAAOyE,OAAO,WAAW;oBACpD,EAAE,OAAOd,OAAY;wBACnB,MAAM,IAAIC,MACR,CAAC,8BAA8B,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAEd,MAAM,OAAO,CAAC,cAAc,EAAE3B,KAAK,SAAS,CAAChC,QAAQ,EACtG;4BAAE,OAAO2D;wBAAM;oBAEnB;oBAGFhE,MAAM,kBAAkB8E,OAAO,IAAI;oBACnC,MAAMY,WAAWZ,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;oBAChD,MAAMY,SAASrF,OAAOW;oBACtBhB,MAAM,iBAAiB8E,OAAO,IAAI;oBAElC,IAAI;wBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;4BACpC9E,MAAM;4BACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC8E,OAAO,IAAI,EAAEzE;4BACpDL,MAAM;wBACR;oBACF,EAAE,OAAOsF,eAAoB;wBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;wBACnC,MAAM,IAAIrB,MACR,CAAC,uCAAuC,EAAEa,OAAO,IAAI,CAAC,EAAE,EAAES,iBAAiB,EAC3E;4BAAE,OAAOD;wBAAc;oBAE3B;oBAEA,OAAO;wBACL,QAAQ;4BACN,SAAS;4BACT,QAAQV;4BACR,OAAOvE;wBACT;oBACF;gBACF;YACF;YACAqB,MAAM,IAAI,CAACP;QACb;QAGF,MAAMwE,eAAejE,MAAM,GAAG,CAC5B,CAACP,MAA0ByE;YACzB,IAAIzE,AAAc,aAAdA,KAAK,IAAI,EACX,OAAO,IAAI,CAAC,6BAA6B,CACvCA,MACAyE,UAAUlE,MAAM,MAAM,GAAG;YAG7B,OAAOP;QACT;QAGF,OAAO;YACL,OAAOwE;QACT;IACF;IAEA,MAAc,qBAAqBE,eAAgC,EAAE;QACnE,MAAMjD,WAAWhC,KAAK,GAAG;QACzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;QACxD,MAAMC,aAAoC;YACxC,MAAM;YACN,IAAIF;YACJ,YAAYC,UAAU,gBAAgB;YACtC,QAAQ;QACV;QAEAgD,gBAAgB,IAAI,CAAC,QAAQ,GAAG;YAAC/C;SAAW;QAC3C+C,gBAAgB,IAAI,CAA2B,SAAS,GAAGhD;QAE5D,OAAO;YACLA;QACF;IACF;IAEA,MAAM,uBAAuBiD,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,eAAe,IAAIC,SAASC,aAAa,UAAUJ,kBAAkB;YACzE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAEA,MAAM3E,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACL2E;YACF;YACA,UAAU,OAAOzF,OAAOwF;gBACtB,MAAM,IAAI,CAAC,oBAAoB,CAACA;gBAChC,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLE;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QAEA,MAAMC,aAAa,MAAM,CAAC7E;QAC1B,MAAM6E,aAAa,KAAK;QAExB,OAAO;YACL,UAAUA;QACZ;IACF;IAEQ,mBACNF,eAAuB,EACvBK,aAAiC,EACjC3E,WAAyB,EACG;QAC5B,MAAML,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACL2E;YACF;YACA,UAAU,OAAOzF,OAAOwF;gBACtB,MAAMO,YAAYxF,KAAK,GAAG;gBAC1B,MAAM,EAAEiC,SAAS,EAAE,GAAG,MAAM,IAAI,CAAC,oBAAoB,CAACgD;gBACtD,MAAM,EAAEQ,MAAM,EAAE,GAAG7E;gBACnB,MAAM8E,qBACJD,AAAW,kBAAXA,SAA2B7E,YAAY,kBAAkB,GAAGO;gBAE9DK,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMyC,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD7E,MACE,sCACA6E,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhD1C,OAAOmE,MAAM,OAAO,CAAC1B,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpB2B,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAOH,AAAAA,CAAAA,qBAAqBI,iBAAiB9E,cAAAA,EAC9DvB,MAAM,eAAe,EACrB;oBACE,SAASwC;oBACTsD;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CtB;oBACArD;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFxB,MAAM,cAAcqC,KAAK,SAAS,CAACoE,YAAY,MAAM;gBAErD,MAAM,EACJE,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClC7C,KAAK,EACLzB,KAAK,EACLuE,WAAW,EACXnC,KAAK,EACN,GAAG8B;gBAEJZ,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCiB;gBACF;gBACAjB,gBAAgB,IAAI,CAAC,KAAK,GAAGtD;gBAE7B,MAAMwE,eAAeJ,WAAW,EAAE;gBAElC,IAAIhC,OAAO;oBACT,MAAMqC,UAAUpG,KAAK,GAAG;oBACxB,MAAMqG,gBAAgBtC,QAASqC,CAAAA,UAAUZ,SAAQ;oBACjD,IAAIa,gBAAgB,GAClBF,aAAa,IAAI,CAAC;wBAChB,MAAM;wBACN,OAAO;4BACL,QAAQE;wBACV;wBACA,QAAQ;oBACV;gBAEJ;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrB3E,OACE,CAACyE,sCAAsClC,OACvCX,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAAS+C;wBACTF;wBACAD;wBACA,UAAUH,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA5D;gBACF;YACF;QACF;QAEA,OAAO1B;IACT;IAEA,MAAM,SACJ+F,KAAa,EACb3F,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMwE,eAAe,IAAIC,SAASiB,OAAO;YACvC,aAAa,IAAI,CAAC,mBAAmB;QACvC;QACA,MAAM,EAAExF,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACH,OAAOC;QAC5D,MAAMwE,aAAa,MAAM,CAACtE;QAC1B,MAAML,SAAS,MAAM2E,aAAa,KAAK;QACvC,MAAM,EAAEmB,MAAM,EAAE,GAAG9F;QACnB,OAAO;YACL8F;YACA,UAAUnB;QACZ;IACF;IAEQ,wBAAwBoB,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACGjH,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJqH,UAAkB,EAClB/F,WAAyB,EACzB2E,aAAsB,EACtB1E,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMuE,eAAe,IAAIC,SAASC,aAAa,UAAUqB,aAAa;YACpE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAEA,IAAIC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvDlG,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAIgG,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO,IAAI,CAAC,eAAe,CAAC1B,cAAc2B,UAAUnG;YACtD;YAGA,MAAMoG,eAAe,IAAI,CAAC,kBAAkB,CAC1CL,YACApB,eACA3E;YAGF,MAAMwE,aAAa,MAAM,CAAC4B;YAC1B,MAAMvG,SAAS,MAAM2E,aAAa,KAAK;YACvC,MAAMS,aAAiCpF,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAI2E,aAAa,cAAc,IAC7B,OAAO;gBACL,QAAQS;gBACR,UAAUT;YACZ;YAIF,MAAMzE,QAAQkF,WAAW,OAAO,IAAI,EAAE;YACtCgB,SAAS,IAAI,IAAKhB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAIoB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CtG,OACAC,aACAC;gBAEFuE,aAAa,MAAM,CAAC6B,YAAY,KAAK;YACvC,EAAE,OAAO7D,OAAO;gBACd,OAAO,IAAI,CAAC,eAAe,CACzBgC,cACA,CAAC,4CAA4C,EAAEhC,MAAM,SAAS,EAAE3B,KAAK,SAAS,CAC5Ed,QACC,EACHC;YAEJ;YAEA,MAAMwE,aAAa,KAAK;YACxB,IAAIA,aAAa,cAAc,IAC7B,OAAO;gBACL,QAAQjE;gBACR,UAAUiE;YACZ;YAIF,IAAI,CAACS,WAAW,kCAAkC,EAChD;YAIFe;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACA,UAAUzB;QACZ;IACF;IAEQ,oBACN8B,IAAsE,EACtEC,MAA2B,EAC3BvG,WAAyB,EACzBwG,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO1H,OAAO4B;gBACtB,MAAM,EAAEd,IAAI,EAAE,GAAGc;gBACjB,IAAIK;gBACJ,MAAME,gBAAgC,CAACC;oBACrCH,cAAcG;gBAChB;gBACA,IAAI,CAAC,OAAO,CAAC,iBAAiB,GAAGD;gBAGjC,MAAMI,WAAWhC,KAAK,GAAG;gBACzB,MAAMiC,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC;gBACxD1B,KAAK,SAAS,GAAG0B;gBAEjB,MAAMC,aAAoC;oBACxC,MAAM;oBACN,IAAIF;oBACJ,YAAYC,UAAU,gBAAgB;oBACtC,QAAQ;gBACV;gBACA1B,KAAK,QAAQ,GAAG;oBAAC2B;iBAAW;gBAE5B,MAAMqF,mBAAmBL,AAAS,YAATA;gBACzB,IAAIM,cAAcL;gBAClB,IAAIM,cAAc;gBAClB,IAAIF,oBAAqBL,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEO,cAAc;oBACd,MAAMC,gBACJR,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIK,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGP,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,MAAM,EAAEQ,IAAI,EAAEhG,KAAK,EAAEiG,OAAO,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACzDJ,aACA5G,aACAwG,KACAC;gBAGF,IAAIQ,eAAeF;gBACnB,IAAIJ,kBAEF,IAAI,AAAgB,YAAhB,OAAOI,MACTE,eAAeF;qBACV,IAAIT,AAAS,cAATA,MAEPW,eADEF,QAAAA,OACa,QAECA,IAAY,CAACF,YAAY;qBAEtC,IAAIE,QAAAA,MACTE,eAAe;qBACV;oBACLrG,OACEmG,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACF,YAAY,AAAD,MAAMtG,QACxB;oBAEF0G,eAAgBF,IAAY,CAACF,YAAY;gBAC3C;gBAGF,OAAO;oBACL,QAAQI;oBACR,KAAKnG;oBACLC;oBACAiG;gBACF;YACF;QACF;QAEA,OAAON;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BvG,WAAyB,EACzBwG,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjC,eAAe,IAAIC,SACvBC,aACE4B,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS1F,KAAK,SAAS,CAAC0F,UAEvD;YACE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QAGF,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAvG,aACAwG,KACAC;QAGF,MAAMjC,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACkC;QAC7D,MAAM7G,SAAS,MAAM2E,aAAa,KAAK;QAEvC,IAAI,CAAC3E,QACH,MAAM,IAAI4C,MACR;QAIJ,MAAM,EAAEkD,MAAM,EAAEqB,OAAO,EAAE,GAAGnH;QAE5B,OAAO;YACL8F;YACAqB;YACA,UAAUxC;QACZ;IACF;IAEA,MAAc,gBACZA,YAAsB,EACtB2B,QAAgB,EAChBnG,WAAyB,EACzB;QACA,MAAMkH,YAAsD;YAC1D,MAAM;YACN,OAAO;gBACL,SAASf;YACX;YACA,QAAQ;QACV;QACA,MAAM,EAAEjG,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClD;YAACgH;SAAU,EACXlH;QAEF,MAAMwE,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACtE,KAAK,CAAC,EAAE;QACrE,MAAMsE,aAAa,KAAK;QAExB,OAAO;YACL,QAAQjE;YACR,UAAUiE;QACZ;IACF;IAEA,MAAM,aAAa2C,MAAc,EAAEnH,WAAyB,EAAE;QAC5D,MAAMoH,YAAsD;YAC1D,MAAM;YACN,OAAO;gBACLD;YACF;YACA,QAAQ;QACV;QAEA,MAAM,EAAE,OAAOE,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;YAACD;SAAU,EACXpH;QAGF,OAAO,IAAI,CAAC,6BAA6B,CAACqH,UAAU,CAAC,EAAE;IACzD;IAEA,MAAM,QACJC,SAAsB,EACtBd,GAA+B,EAC/BxG,WAAyB,EACO;QAChC,MAAM,EAAEuH,UAAU,EAAEd,gBAAgB,EAAE,GAAGe,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM/C,eAAe,IAAIC,SAASC,aAAa,WAAW+C,cAAc;YACtE,aAAa,IAAI,CAAC,mBAAmB;QACvC;QACA,MAAM,EAAEC,SAAS,EAAEC,eAAe,EAAE,GAAGnB;QAEvC5F,OAAO0G,WAAW;QAClB1G,OAAO8G,WAAW;QAClB9G,OAAO+G,iBAAiB;QAExB/G,OACE+G,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBxI,KAAK,GAAG;QACjC,IAAIwF,YAAYxF,KAAK,GAAG;QACxB,IAAIyI,eAAe;QACnB,MAAOzI,KAAK,GAAG,KAAKwI,mBAAmBF,UAAW;YAChD9C,YAAYxF,KAAK,GAAG;YACpB,MAAMsH,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAa,YACAvH,aACA;gBACE,iBAAiB;YACnB,GACAyG;YAGF,MAAMjC,aAAa,MAAM,CAAC,IAAI,CAAC,6BAA6B,CAACkC;YAC7D,MAAM7G,SAAU,MAAM2E,aAAa,KAAK;YAOxC,IAAI3E,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQU;gBACR,UAAUiE;YACZ;YAGFqD,eACEhI,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAE0H,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAM1I,KAAK,GAAG;YACpB,IAAI0I,MAAMlD,YAAY+C,iBAAiB;gBACrC,MAAMlC,gBAAgBkC,kBAAmBG,CAAAA,MAAMlD,SAAQ;gBACvD,MAAMmD,YAAY,MAAM,IAAI,CAAC,YAAY,CAACtC,eAAezF;gBACzD,MAAMwE,aAAa,MAAM,CAACuD;YAC5B;QACF;QAEA,OAAO,IAAI,CAAC,eAAe,CACzBvD,cACA,CAAC,iBAAiB,EAAEqD,cAAc,EAClC7H;IAEJ;IA//BA,YACEgI,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QAzBF;QAEA;QAEA;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;IACjC;AAi/BF"}
@@ -143,7 +143,7 @@ function trimContextByViewport(execution) {
143
143
  }) : execution.tasks
144
144
  };
145
145
  }
146
- const getMidsceneVersion = ()=>"0.30.6-beta-20251022112352.0";
146
+ const getMidsceneVersion = ()=>"0.30.6-beta-20251023092723.0";
147
147
  const parsePrompt = (prompt)=>{
148
148
  if ('string' == typeof prompt) return {
149
149
  textPrompt: prompt,
@@ -20,6 +20,9 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
20
20
  if (!f._def) return f;
21
21
  const typeName = f._def.typeName;
22
22
  if ('ZodOptional' === typeName || 'ZodNullable' === typeName || 'ZodDefault' === typeName) return unwrapField(f._def.innerType);
23
+ if ('ZodEffects' === typeName) {
24
+ if (f._def.schema) return unwrapField(f._def.schema);
25
+ }
23
26
  return f;
24
27
  };
25
28
  const actualField = unwrapField(field);
@@ -37,6 +40,15 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
37
40
  const values = (null == (_actualField__def1 = actualField._def) ? void 0 : null == (_actualField__def_values = _actualField__def1.values) ? void 0 : _actualField__def_values.map((option)=>String(`'${option}'`)).join(', ')) ?? 'enum';
38
41
  return `enum(${values})`;
39
42
  }
43
+ if ('ZodUnion' === fieldTypeName) {
44
+ var _actualField__def2;
45
+ const options = null == (_actualField__def2 = actualField._def) ? void 0 : _actualField__def2.options;
46
+ if (options && options.length > 0) {
47
+ const types = options.map((opt)=>getTypeName(opt));
48
+ return types.join(' | ');
49
+ }
50
+ return 'union';
51
+ }
40
52
  console.warn('failed to parse Zod type. This may lead to wrong params from the LLM.\n', actualField._def);
41
53
  return actualField.toString();
42
54
  };
@@ -46,6 +58,9 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
46
58
  if (!f._def) return f;
47
59
  const typeName = f._def.typeName;
48
60
  if ('ZodOptional' === typeName || 'ZodNullable' === typeName || 'ZodDefault' === typeName) return unwrapField(f._def.innerType);
61
+ if ('ZodEffects' === typeName) {
62
+ if (f._def.schema) return unwrapField(f._def.schema);
63
+ }
49
64
  return f;
50
65
  };
51
66
  if ("description" in field) return field.description || null;
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { ZodObject, z } from 'zod';\nimport { ifMidsceneLocatorField } from '../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst vlCurrentLog = `\"log\": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The log should contain the following information: \"The user wants to do ... . According to the instruction and the previous logs, next step is to .... Now i am going to compose an action '{ action-type }' to do ....\". If no action should be done, log the reason. Use the same language as the user's instruction.`;\nconst llmCurrentLog = `\"log\": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like \"Now i want to use action '{ action-type }' to do ..\". If no action should be done, log the reason. \". Use the same language as the user's instruction.`;\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\nconst vlLocateParam = () =>\n '{bbox: [number, number, number, number], prompt: string }';\nconst llmLocateParam = () => '{\"id\": string, \"prompt\": string}';\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n // Try to extract parameter information from the zod schema\n // For zod object schemas, extract type information and descriptions\n const shape = (action.paramSchema as ZodObject<any>).shape;\n\n if (!shape) {\n console.warn(\n `action.paramSchema is not a ZodObject, may lead to unexpected behavior, action name: ${action.name}`,\n );\n }\n\n const paramLines: string[] = [];\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n if (paramLines.length > 0) {\n fields.push('- param:');\n for (const paramLine of paramLines) {\n fields.push(` - ${paramLine}`);\n }\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nconst systemTemplateOfVLPlanning = ({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n}) => {\n const actionNameList = actionSpace.map((action) => action.name).join(', ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, vlLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\nTarget: User will give you a latest screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Always give ONLY ONE action in \\`log\\` field (or null if no action should be done), instead of multiple actions. Supported actions are ${actionNameList}.\n- Don't repeat actions in the previous logs.\n- Bbox is the bounding box of the element to be located. It's an array of 4 numbers, representing ${bboxDescription(vlMode)}.\n\nSupporting actions:\n${actionList}\n\nField description:\n* The \\`prompt\\` field inside the \\`locate\\` field is a short description that could be used to locate the element.\n\nReturn in JSON format:\n{\n ${vlCurrentLog}\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the previous log shows \"The 'Confirm' button has been clicked\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n \"log\": \"The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, next step is to tap the 'Yes' button in the popup. Now i am going to compose an action 'Tap' to click 'Yes' in popup.\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n \"bbox\": [100, 100, 200, 200],\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n },\n \"more_actions_needed_by_instruction\": false,\n}\n`;\n};\n\nconst systemTemplateOfLLM = ({\n actionSpace,\n}: { actionSpace: DeviceAction<any>[] }) => {\n const actionNameList = actionSpace.map((action) => action.name).join(' / ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, llmLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\n## Role\n\nYou are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.\n\n## Objective\n\n- Decompose the instruction user asked into a series of actions\n- Locate the target element if possible\n- If the instruction cannot be accomplished, give a further plan.\n\n## Workflow\n\n1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.\n2. Decompose the user's task into a sequence of feasible actions, and place it in the \\`actions\\` field. There are different types of actions (${actionNameList}). The \"About the action\" section below will give you more details.\n3. Consider whether the user's instruction will be accomplished after the actions you composed.\n- If the instruction is accomplished, set \\`more_actions_needed_by_instruction\\` to false.\n- If more actions are needed, set \\`more_actions_needed_by_instruction\\` to true. Get ready to hand over to the next talent people like you. Carefully log what have been done in the \\`log\\` field, he or she will continue the task according to your logs.\n4. If the task is not feasible on this page, set \\`error\\` field to the reason.\n\n## Constraints\n\n- All the actions you composed MUST be feasible, which means all the action fields can be filled with the page context information you get. If not, don't plan this action.\n- Trust the \"What have been done\" field about the task (if any), don't repeat actions in it.\n- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \\`\\`\\`json\\`\\`\\`.\n- If the screenshot and the instruction are totally irrelevant, set reason in the \\`error\\` field.\n\n## About the \\`actions\\` field\n\nThe \\`locate\\` param is commonly used in the \\`param\\` field of the action, means to locate the target element to perform the action, it conforms to the following scheme:\n\ntype LocateParam = {\n \"id\": string, // the id of the element found. It should either be the id marked with a rectangle in the screenshot or the id described in the description.\n \"prompt\"?: string // the description of the element to find. It can only be omitted when locate is null.\n} | null // If it's not on the page, the LocateParam should be null\n\n## Supported actions\n\nEach action has a \\`type\\` and corresponding \\`param\\`. To be detailed:\n${actionList}\n\n`.trim();\n};\n\nconst outputTemplate = `\n## Output JSON Format:\n\nThe JSON format is as follows:\n\n{\n \"actions\": [\n // ... some actions\n ],\n ${llmCurrentLog}\n ${commonOutputFields}\n}\n\n## Examples\n\n### Example: Decompose a task\n\nWhen you received the following information:\n\n* Instruction: 'Click the language switch button, wait 1s, click \"English\"'\n* Logs: null\n* Page Context (screenshot and description) shows: There is a language switch button, and the \"English\" option is not shown in the screenshot now.\n\nBy viewing the page screenshot and description, you should consider this and output the JSON:\n\n* The user intent is: tap the switch button, sleep, and tap the 'English' option\n* The language switch button is shown in the screenshot, and can be located by the page description or the id marked with a rectangle. So we can plan a Tap action to do this.\n* Plan a Sleep action to wait for 1 second to ensure the language options are displayed.\n* The \"English\" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So don't plan any action to do this.\n* Compose the log: The user wants to do click the language switch button, wait 1s, click \"English\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\n* The task cannot be accomplished (because the last tapping action is not finished yet), so the \\`more_actions_needed_by_instruction\\` field is true. The \\`error\\` field is null.\n\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": { id: \"c81c4e9a33\", prompt: \"The language switch button\" }\n }\n },\n {\n \"thought\": \"Wait for 1 second to ensure the language options are displayed.\",\n \"type\": \"Sleep\",\n \"param\": { \"timeMs\": 1000 },\n }\n ],\n \"error\": null,\n \"more_actions_needed_by_instruction\": true,\n \"log\": \"The user wants to do click the language switch button, wait 1s, click \\\"English\\\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\",\n}\n\n### Example: What NOT to do\nWrong output:\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \"id\": \"c81c4e9a33\" } // WRONG: prompt is missing, this is not a valid LocateParam\n }\n },\n {\n \"thought\": \"Click the English option\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": null // WRONG: if the element is not on the page, you should not plan this action\n }\n }\n ],\n \"more_actions_needed_by_instruction\": false, // WRONG: should be true\n \"log\": \"The user wants to do click the language switch button, wait 1s, click \\\"English\\\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\",\n}\n`;\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n}) {\n if (vlMode) {\n return systemTemplateOfVLPlanning({ actionSpace, vlMode });\n }\n\n return `${systemTemplateOfLLM({ actionSpace })}\\n\\n${outputTemplate}`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["vlCurrentLog","llmCurrentLog","commonOutputFields","vlLocateParam","llmLocateParam","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","shape","console","paramLines","getTypeName","field","_actualField__def","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","_actualField__def_values","values","option","String","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","systemTemplateOfVLPlanning","actionSpace","vlMode","actionNameList","actionDescriptionList","actionList","bboxDescription","systemTemplateOfLLM","outputTemplate","systemPromptToTaskPlanning","planSchema"],"mappings":";;AASA,MAAMA,eAAe;AACrB,MAAMC,gBAAgB;AAEtB,MAAMC,qBAAqB,CAAC;+NACmM,CAAC;AAChO,MAAMC,gBAAgB,IACpB;AACF,MAAMC,iBAAiB,IAAM;AAEtB,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QAGtB,MAAMI,QAASJ,OAAO,WAAW,CAAoB,KAAK;QAE1D,IAAI,CAACI,OACHC,QAAQ,IAAI,CACV,CAAC,qFAAqF,EAAEL,OAAO,IAAI,EAAE;QAIzG,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,cAAc,CAACC;gBAoBGC;YAlBtB,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAEA,MAAME,cAAcH,YAAYF;YAChC,MAAMM,gBAAgB,QAAAL,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ;YAEhD,IAAIK,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;YAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;YACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;gBAEjC,IAAIC,uBAAuBF,cACzB,OAAOZ;gBAET,OAAO;YACT;YACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAE5BE,0BAAAA;gBADH,MAAMC,SACJ,AAAC,SAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,mBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBACG,GAAG,CAAC,CAACE,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,IAAI,CAAC,KAAI,KAAK;gBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;YAC1B;YAEAZ,QAAQ,IAAI,CACV,2EACAQ,YAAY,IAAI;YAElB,OAAOA,YAAY,QAAQ;QAC7B;QAGA,MAAMO,iBAAiB,CAACZ;gBAgClBC;YA9BJ,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAGrC,OAAOA;YACT;YAGA,IAAI,iBAAiBH,OACnB,OAAOA,MAAM,WAAW,IAAI;YAG9B,MAAMK,cAAcH,YAAYF;YAGhC,IAAI,iBAAiBK,aACnB,OAAOA,YAAY,WAAW,IAAI;YAIpC,IAAIJ,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,aACjC;gBAAA,IAAI,kCAAkCI,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;YACT;YAGF,OAAO;QACT;QAEA,KAAK,MAAM,CAACQ,KAAKb,MAAM,IAAIc,OAAO,OAAO,CAAClB,OACxC,IAAII,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;YAEtC,MAAMe,aACJ,AAAqC,cAArC,OAAQf,MAAc,UAAU,IAC/BA,MAAc,UAAU;YAC3B,MAAMgB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;YAGjD,MAAMT,WAAWL,YAAYC;YAG7B,MAAMiB,cAAcL,eAAeZ;YAGnC,IAAIkB,YAAY,GAAGF,gBAAgB,EAAE,EAAEZ,UAAU;YACjD,IAAIa,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;YAGnCnB,WAAW,IAAI,CAACoB;QAClB;QAGF,IAAIpB,WAAW,MAAM,GAAG,GAAG;YACzBH,OAAO,IAAI,CAAC;YACZ,KAAK,MAAMuB,aAAapB,WACtBH,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEuB,WAAW;QAElC;IACF;IAEA,OAAO,CAAC,EAAE,EAAE1B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEA,MAAMyB,6BAA6B,CAAC,EAClCC,WAAW,EACXC,MAAM,EAIP;IACC,MAAMC,iBAAiBF,YAAY,GAAG,CAAC,CAAC5B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM+B,wBAAwBH,YAAY,GAAG,CAAC,CAAC5B,SACtCD,qBAAqBC,QAAQH;IAEtC,MAAMmC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;yIAK+H,EAAED,eAAe;;kGAExD,EAAEG,gBAAgBJ,QAAQ;;;AAG5H,EAAEG,WAAW;;;;;;;EAOX,EAAEtC,aAAa;EACf,EAAEE,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;AA0BvB,CAAC;AACD;AAEA,MAAMsC,sBAAsB,CAAC,EAC3BN,WAAW,EAC0B;IACrC,MAAME,iBAAiBF,YAAY,GAAG,CAAC,CAAC5B,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAM+B,wBAAwBH,YAAY,GAAG,CAAC,CAAC5B,SACtCD,qBAAqBC,QAAQF;IAEtC,MAAMkC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;;;;;;;;;;+IAcqI,EAAED,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;AAyBhK,EAAEE,WAAW;;AAEb,CAAC,CAAC,IAAI;AACN;AAEA,MAAMG,iBAAiB,CAAC;;;;;;;;;EAStB,EAAExC,cAAc;EAChB,EAAEC,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgEvB,CAAC;AAEM,eAAewC,2BAA2B,EAC/CR,WAAW,EACXC,MAAM,EAIP;IACC,IAAIA,QACF,OAAOF,2BAA2B;QAAEC;QAAaC;IAAO;IAG1D,OAAO,GAAGK,oBAAoB;QAAEN;IAAY,GAAG,IAAI,EAAEO,gBAAgB;AACvE;AAEO,MAAME,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { ZodObject, z } from 'zod';\nimport { ifMidsceneLocatorField } from '../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst vlCurrentLog = `\"log\": string, // Log your thoughts and what the next one action (ONLY ONE!) you can do according to the screenshot and the instruction. The log should contain the following information: \"The user wants to do ... . According to the instruction and the previous logs, next step is to .... Now i am going to compose an action '{ action-type }' to do ....\". If no action should be done, log the reason. Use the same language as the user's instruction.`;\nconst llmCurrentLog = `\"log\": string, // Log what the next actions you can do according to the screenshot and the instruction. The typical log looks like \"Now i want to use action '{ action-type }' to do ..\". If no action should be done, log the reason. \". Use the same language as the user's instruction.`;\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\nconst vlLocateParam = () =>\n '{bbox: [number, number, number, number], prompt: string }';\nconst llmLocateParam = () => '{\"id\": string, \"prompt\": string}';\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n // Try to extract parameter information from the zod schema\n // For zod object schemas, extract type information and descriptions\n const shape = (action.paramSchema as ZodObject<any>).shape;\n\n if (!shape) {\n console.warn(\n `action.paramSchema is not a ZodObject, may lead to unexpected behavior, action name: ${action.name}`,\n );\n }\n\n const paramLines: string[] = [];\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n if (paramLines.length > 0) {\n fields.push('- param:');\n for (const paramLine of paramLines) {\n fields.push(` - ${paramLine}`);\n }\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nconst systemTemplateOfVLPlanning = ({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n}) => {\n const actionNameList = actionSpace.map((action) => action.name).join(', ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, vlLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\nTarget: User will give you a latest screenshot, an instruction and some previous logs indicating what have been done. Please tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\nRestriction:\n- Don't give extra actions or plans beyond the instruction. ONLY plan for what the instruction requires. For example, don't try to submit the form if the instruction is only to fill something.\n- Always give ONLY ONE action in \\`log\\` field (or null if no action should be done), instead of multiple actions. Supported actions are ${actionNameList}.\n- Don't repeat actions in the previous logs.\n- Bbox is the bounding box of the element to be located. It's an array of 4 numbers, representing ${bboxDescription(vlMode)}.\n\nSupporting actions:\n${actionList}\n\nField description:\n* The \\`prompt\\` field inside the \\`locate\\` field is a short description that could be used to locate the element.\n\nReturn in JSON format:\n{\n ${vlCurrentLog}\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, when the instruction is \"click 'Confirm' button, and click 'Yes' in popup\" and the previous log shows \"The 'Confirm' button has been clicked\", by viewing the screenshot and previous logs, you should consider: We have already clicked the 'Confirm' button, so next we should find and click 'Yes' in popup.\n\nthis and output the JSON:\n\n{\n \"log\": \"The user wants to do click 'Confirm' button, and click 'Yes' in popup. According to the instruction and the previous logs, next step is to tap the 'Yes' button in the popup. Now i am going to compose an action 'Tap' to click 'Yes' in popup.\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": {\n \"bbox\": [100, 100, 200, 200],\n \"prompt\": \"The 'Yes' button in popup\"\n }\n }\n },\n \"more_actions_needed_by_instruction\": false,\n}\n`;\n};\n\nconst systemTemplateOfLLM = ({\n actionSpace,\n}: { actionSpace: DeviceAction<any>[] }) => {\n const actionNameList = actionSpace.map((action) => action.name).join(' / ');\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(action, llmLocateParam());\n });\n const actionList = actionDescriptionList.join('\\n');\n\n return `\n## Role\n\nYou are a versatile professional in software UI automation. Your outstanding contributions will impact the user experience of billions of users.\n\n## Objective\n\n- Decompose the instruction user asked into a series of actions\n- Locate the target element if possible\n- If the instruction cannot be accomplished, give a further plan.\n\n## Workflow\n\n1. Receive the screenshot, element description of screenshot(if any), user's instruction and previous logs.\n2. Decompose the user's task into a sequence of feasible actions, and place it in the \\`actions\\` field. There are different types of actions (${actionNameList}). The \"About the action\" section below will give you more details.\n3. Consider whether the user's instruction will be accomplished after the actions you composed.\n- If the instruction is accomplished, set \\`more_actions_needed_by_instruction\\` to false.\n- If more actions are needed, set \\`more_actions_needed_by_instruction\\` to true. Get ready to hand over to the next talent people like you. Carefully log what have been done in the \\`log\\` field, he or she will continue the task according to your logs.\n4. If the task is not feasible on this page, set \\`error\\` field to the reason.\n\n## Constraints\n\n- All the actions you composed MUST be feasible, which means all the action fields can be filled with the page context information you get. If not, don't plan this action.\n- Trust the \"What have been done\" field about the task (if any), don't repeat actions in it.\n- Respond only with valid JSON. Do not write an introduction or summary or markdown prefix like \\`\\`\\`json\\`\\`\\`.\n- If the screenshot and the instruction are totally irrelevant, set reason in the \\`error\\` field.\n\n## About the \\`actions\\` field\n\nThe \\`locate\\` param is commonly used in the \\`param\\` field of the action, means to locate the target element to perform the action, it conforms to the following scheme:\n\ntype LocateParam = {\n \"id\": string, // the id of the element found. It should either be the id marked with a rectangle in the screenshot or the id described in the description.\n \"prompt\"?: string // the description of the element to find. It can only be omitted when locate is null.\n} | null // If it's not on the page, the LocateParam should be null\n\n## Supported actions\n\nEach action has a \\`type\\` and corresponding \\`param\\`. To be detailed:\n${actionList}\n\n`.trim();\n};\n\nconst outputTemplate = `\n## Output JSON Format:\n\nThe JSON format is as follows:\n\n{\n \"actions\": [\n // ... some actions\n ],\n ${llmCurrentLog}\n ${commonOutputFields}\n}\n\n## Examples\n\n### Example: Decompose a task\n\nWhen you received the following information:\n\n* Instruction: 'Click the language switch button, wait 1s, click \"English\"'\n* Logs: null\n* Page Context (screenshot and description) shows: There is a language switch button, and the \"English\" option is not shown in the screenshot now.\n\nBy viewing the page screenshot and description, you should consider this and output the JSON:\n\n* The user intent is: tap the switch button, sleep, and tap the 'English' option\n* The language switch button is shown in the screenshot, and can be located by the page description or the id marked with a rectangle. So we can plan a Tap action to do this.\n* Plan a Sleep action to wait for 1 second to ensure the language options are displayed.\n* The \"English\" option button is not shown in the screenshot now, it means it may only show after the previous actions are finished. So don't plan any action to do this.\n* Compose the log: The user wants to do click the language switch button, wait 1s, click \"English\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\n* The task cannot be accomplished (because the last tapping action is not finished yet), so the \\`more_actions_needed_by_instruction\\` field is true. The \\`error\\` field is null.\n\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": { id: \"c81c4e9a33\", prompt: \"The language switch button\" }\n }\n },\n {\n \"thought\": \"Wait for 1 second to ensure the language options are displayed.\",\n \"type\": \"Sleep\",\n \"param\": { \"timeMs\": 1000 },\n }\n ],\n \"error\": null,\n \"more_actions_needed_by_instruction\": true,\n \"log\": \"The user wants to do click the language switch button, wait 1s, click \\\"English\\\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\",\n}\n\n### Example: What NOT to do\nWrong output:\n{\n \"actions\":[\n {\n \"thought\": \"Click the language switch button to open the language options.\",\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \"id\": \"c81c4e9a33\" } // WRONG: prompt is missing, this is not a valid LocateParam\n }\n },\n {\n \"thought\": \"Click the English option\",\n \"type\": \"Tap\", \n \"param\": {\n \"locate\": null // WRONG: if the element is not on the page, you should not plan this action\n }\n }\n ],\n \"more_actions_needed_by_instruction\": false, // WRONG: should be true\n \"log\": \"The user wants to do click the language switch button, wait 1s, click \\\"English\\\". According to the instruction and the previous logs, next step is to tap the language switch button to open the language options. Now i am going to compose an action 'Tap' to click the language switch button.\",\n}\n`;\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n}) {\n if (vlMode) {\n return systemTemplateOfVLPlanning({ actionSpace, vlMode });\n }\n\n return `${systemTemplateOfLLM({ actionSpace })}\\n\\n${outputTemplate}`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["vlCurrentLog","llmCurrentLog","commonOutputFields","vlLocateParam","llmLocateParam","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","shape","console","paramLines","getTypeName","field","_actualField__def","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","_actualField__def_values","values","option","String","_actualField__def2","options","types","opt","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","systemTemplateOfVLPlanning","actionSpace","vlMode","actionNameList","actionDescriptionList","actionList","bboxDescription","systemTemplateOfLLM","outputTemplate","systemPromptToTaskPlanning","planSchema"],"mappings":";;AASA,MAAMA,eAAe;AACrB,MAAMC,gBAAgB;AAEtB,MAAMC,qBAAqB,CAAC;+NACmM,CAAC;AAChO,MAAMC,gBAAgB,IACpB;AACF,MAAMC,iBAAiB,IAAM;AAEtB,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QAGtB,MAAMI,QAASJ,OAAO,WAAW,CAAoB,KAAK;QAE1D,IAAI,CAACI,OACHC,QAAQ,IAAI,CACV,CAAC,qFAAqF,EAAEL,OAAO,IAAI,EAAE;QAIzG,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,cAAc,CAACC;gBA4BGC;YA1BtB,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAIrC,IAAIC,AAAa,iBAAbA,UAEF;oBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;gBAClC;gBAGF,OAAOA;YACT;YAEA,MAAME,cAAcH,YAAYF;YAChC,MAAMM,gBAAgB,QAAAL,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ;YAEhD,IAAIK,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;YAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;YAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;YACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;gBAEjC,IAAIC,uBAAuBF,cACzB,OAAOZ;gBAET,OAAO;YACT;YACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAE5BE,0BAAAA;gBADH,MAAMC,SACJ,AAAC,SAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,mBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBACG,GAAG,CAAC,CAACE,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,IAAI,CAAC,KAAI,KAAK;gBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;YAC1B;YAEA,IAAIH,AAAkB,eAAlBA,eAA8B;oBAChBM;gBAAhB,MAAMC,UAAU,QAAAD,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,OAAO;gBACzC,IAAIC,WAAWA,QAAQ,MAAM,GAAG,GAAG;oBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAahB,YAAYgB;oBACpD,OAAOD,MAAM,IAAI,CAAC;gBACpB;gBACA,OAAO;YACT;YAEAjB,QAAQ,IAAI,CACV,2EACAQ,YAAY,IAAI;YAElB,OAAOA,YAAY,QAAQ;QAC7B;QAGA,MAAMW,iBAAiB,CAAChB;gBAwClBC;YAtCJ,MAAMC,cAAc,CAACC;gBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;gBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;gBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;gBAIrC,IAAIC,AAAa,iBAAbA,UAEF;oBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;gBAClC;gBAGF,OAAOA;YACT;YAGA,IAAI,iBAAiBH,OACnB,OAAOA,MAAM,WAAW,IAAI;YAG9B,MAAMK,cAAcH,YAAYF;YAGhC,IAAI,iBAAiBK,aACnB,OAAOA,YAAY,WAAW,IAAI;YAIpC,IAAIJ,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,aACjC;gBAAA,IAAI,kCAAkCI,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;YACT;YAGF,OAAO;QACT;QAEA,KAAK,MAAM,CAACY,KAAKjB,MAAM,IAAIkB,OAAO,OAAO,CAACtB,OACxC,IAAII,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;YAEtC,MAAMmB,aACJ,AAAqC,cAArC,OAAQnB,MAAc,UAAU,IAC/BA,MAAc,UAAU;YAC3B,MAAMoB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;YAGjD,MAAMb,WAAWL,YAAYC;YAG7B,MAAMqB,cAAcL,eAAehB;YAGnC,IAAIsB,YAAY,GAAGF,gBAAgB,EAAE,EAAEhB,UAAU;YACjD,IAAIiB,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;YAGnCvB,WAAW,IAAI,CAACwB;QAClB;QAGF,IAAIxB,WAAW,MAAM,GAAG,GAAG;YACzBH,OAAO,IAAI,CAAC;YACZ,KAAK,MAAM2B,aAAaxB,WACtBH,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE2B,WAAW;QAElC;IACF;IAEA,OAAO,CAAC,EAAE,EAAE9B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEA,MAAM6B,6BAA6B,CAAC,EAClCC,WAAW,EACXC,MAAM,EAIP;IACC,MAAMC,iBAAiBF,YAAY,GAAG,CAAC,CAAChC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAMmC,wBAAwBH,YAAY,GAAG,CAAC,CAAChC,SACtCD,qBAAqBC,QAAQH;IAEtC,MAAMuC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;yIAK+H,EAAED,eAAe;;kGAExD,EAAEG,gBAAgBJ,QAAQ;;;AAG5H,EAAEG,WAAW;;;;;;;EAOX,EAAE1C,aAAa;EACf,EAAEE,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;AA0BvB,CAAC;AACD;AAEA,MAAM0C,sBAAsB,CAAC,EAC3BN,WAAW,EAC0B;IACrC,MAAME,iBAAiBF,YAAY,GAAG,CAAC,CAAChC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;IACrE,MAAMmC,wBAAwBH,YAAY,GAAG,CAAC,CAAChC,SACtCD,qBAAqBC,QAAQF;IAEtC,MAAMsC,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,OAAO,CAAC;;;;;;;;;;;;;;+IAcqI,EAAED,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;AAyBhK,EAAEE,WAAW;;AAEb,CAAC,CAAC,IAAI;AACN;AAEA,MAAMG,iBAAiB,CAAC;;;;;;;;;EAStB,EAAE5C,cAAc;EAChB,EAAEC,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAgEvB,CAAC;AAEM,eAAe4C,2BAA2B,EAC/CR,WAAW,EACXC,MAAM,EAIP;IACC,IAAIA,QACF,OAAOF,2BAA2B;QAAEC;QAAaC;IAAO;IAG1D,OAAO,GAAGK,oBAAoB;QAAEN;IAAY,GAAG,IAAI,EAAEO,gBAAgB;AACvE;AAEO,MAAME,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}
@@ -64,11 +64,11 @@ const defineActionInput = (call)=>defineAction({
64
64
  });
65
65
  const actionKeyboardPressParamSchema = z.object({
66
66
  locate: getMidsceneLocationSchema().describe('The element to be clicked before pressing the key').optional(),
67
- keyName: z.string().describe('The key to be pressed')
67
+ keyName: z.string().describe("The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'")
68
68
  });
69
69
  const defineActionKeyboardPress = (call)=>defineAction({
70
70
  name: 'KeyboardPress',
71
- description: 'Press a function key, like "Enter", "Tab", "Escape". Do not use this to type text.',
71
+ description: 'Press a key or key combination, like "Enter", "Tab", "Escape", or "Control+A", "Shift+Enter". Do not use this to type text.',
72
72
  interfaceAlias: 'aiKeyboardPress',
73
73
  paramSchema: actionKeyboardPressParamSchema,
74
74
  call
@@ -1 +1 @@
1
- {"version":3,"file":"device/index.mjs","sources":["webpack://@midscene/core/./src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/ai-model';\nimport type { DeviceAction, LocateResultElement } from '@/types';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n opt?: { _orderSensitive: boolean },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\nexport const defineAction = <\n TSchema extends z.ZodType,\n TRuntime = z.infer<TSchema>,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n call: (param: TRuntime) => Promise<void>;\n } & Partial<\n Omit<\n DeviceAction<TRuntime>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\n// Override the inferred type to use LocateResultElement for the runtime locate field\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe('The element to be input')\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z.string().describe('The key to be pressed'),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a function key, like \"Enter\", \"Tab\", \"Escape\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe('The direction to scroll'),\n scrollType: z\n .enum(['once', 'untilBottom', 'untilTop', 'untilRight', 'untilLeft'])\n .default('once')\n .describe('The scroll type'),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe('The element to be scrolled'),\n});\nexport type ActionScrollParam = {\n direction?: 'down' | 'up' | 'right' | 'left';\n scrollType?: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';\n distance?: number | null;\n locate?: LocateResultElement;\n};\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description: 'Drag and drop the element',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The input field to be cleared'),\n});\nexport type ActionClearInputParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: 'Clear the text content of an input field',\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\nexport type { DeviceAction } from '../types';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput"],"mappings":";;AAOO,MAAeA;AAgCtB;AAIO,MAAMC,eAAe,CAI1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAMO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIK,MAAMO,yBAAyBV,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACW,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQV,4BACL,QAAQ,CAAC,2BACT,QAAQ;IACX,MAAMD,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMa,oBAAoB,CAC/BV,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaa;QACbP;IACF;AAIK,MAAMW,iCAAiCd,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAE,MAAM,GAAG,QAAQ,CAAC;AAC/B;AAMO,MAAMe,4BAA4B,CACvCZ,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaiB;QACbX;IACF;AAIK,MAAMa,0BAA0BhB,EAAE,MAAM,CAAC;IAC9C,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,YAAYA,CAAC,CAADA,OACL,CAAC;QAAC;QAAQ;QAAe;QAAY;QAAc;KAAY,EACnE,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CAAC;AACd;AAQO,MAAMgB,qBAAqB,CAChCd,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamB;QACbb;IACF;AAIK,MAAMe,+BAA+BlB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMkB,0BAA0B,CACrChB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaqB;QACbf;IACF;AAGK,MAAMiB,6BAA6BpB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMqB,wBAAwB,CACnClB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAauB;QACbjB;IACF;AAGK,MAAMmB,yBAAyBtB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMuB,oBAAoB,CAC/BpB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAayB;QACbnB;IACF;AAIK,MAAMqB,8BAA8BxB,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMwB,yBAAyB,CACpCtB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAa2B;QACbrB;IACF"}
1
+ {"version":3,"file":"device/index.mjs","sources":["webpack://@midscene/core/./src/device/index.ts"],"sourcesContent":["import { getMidsceneLocationSchema } from '@/ai-model';\nimport type { DeviceAction, LocateResultElement } from '@/types';\nimport type { ElementNode } from '@midscene/shared/extractor';\nimport { _keyDefinitions } from '@midscene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n opt?: { _orderSensitive: boolean },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\nexport const defineAction = <\n TSchema extends z.ZodType,\n TRuntime = z.infer<TSchema>,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n call: (param: TRuntime) => Promise<void>;\n } & Partial<\n Omit<\n DeviceAction<TRuntime>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be tapped'),\n});\n// Override the inferred type to use LocateResultElement for the runtime locate field\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getMidsceneLocationSchema()\n .describe('The element to be input')\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getMidsceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe('The direction to scroll'),\n scrollType: z\n .enum(['once', 'untilBottom', 'untilTop', 'untilRight', 'untilLeft'])\n .default('once')\n .describe('The scroll type'),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getMidsceneLocationSchema()\n .optional()\n .describe('The element to be scrolled'),\n});\nexport type ActionScrollParam = {\n direction?: 'down' | 'up' | 'right' | 'left';\n scrollType?: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';\n distance?: number | null;\n locate?: LocateResultElement;\n};\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getMidsceneLocationSchema().describe('The position to be dragged'),\n to: getMidsceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description: 'Drag and drop the element',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getMidsceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getMidsceneLocationSchema().describe('The input field to be cleared'),\n});\nexport type ActionClearInputParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: 'Clear the text content of an input field',\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\nexport type { DeviceAction } from '../types';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getMidsceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput"],"mappings":";;AAOO,MAAeA;AAgCtB;AAIO,MAAMC,eAAe,CAI1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAMO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIK,MAAMO,yBAAyBV,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACW,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQV,4BACL,QAAQ,CAAC,2BACT,QAAQ;IACX,MAAMD,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMa,oBAAoB,CAC/BV,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaa;QACbP;IACF;AAIK,MAAMW,iCAAiCd,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMe,4BAA4B,CACvCZ,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaiB;QACbX;IACF;AAIK,MAAMa,0BAA0BhB,EAAE,MAAM,CAAC;IAC9C,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,YAAYA,CAAC,CAADA,OACL,CAAC;QAAC;QAAQ;QAAe;QAAY;QAAc;KAAY,EACnE,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CAAC;AACd;AAQO,MAAMgB,qBAAqB,CAChCd,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamB;QACbb;IACF;AAIK,MAAMe,+BAA+BlB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMkB,0BAA0B,CACrChB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaqB;QACbf;IACF;AAGK,MAAMiB,6BAA6BpB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMqB,wBAAwB,CACnClB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAauB;QACbjB;IACF;AAGK,MAAMmB,yBAAyBtB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMuB,oBAAoB,CAC/BpB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAayB;QACbnB;IACF;AAIK,MAAMqB,8BAA8BxB,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMwB,yBAAyB,CACpCtB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAa2B;QACbrB;IACF"}