@midscene/core 1.9.4 → 1.9.5-beta-20260611033424.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,6 +28,12 @@ const warnLog = getDebug('device-task-executor', {
28
28
  console: true
29
29
  });
30
30
  const maxErrorCountAllowedInOnePlanningLoop = 5;
31
+ const maxPlanningFeedbackLength = 500;
32
+ function truncatePlanningFeedback(feedback) {
33
+ if (feedback.length <= maxPlanningFeedbackLength) return feedback;
34
+ return `${feedback.slice(0, maxPlanningFeedbackLength)}
35
+ ...[truncated, ${feedback.length - maxPlanningFeedbackLength} more characters]`;
36
+ }
31
37
  class TaskExecutor {
32
38
  get page() {
33
39
  return this.interface;
@@ -42,6 +48,15 @@ class TaskExecutor {
42
48
  getActionSpace() {
43
49
  return this.providedActionSpace;
44
50
  }
51
+ setPendingFeedbackMessage(conversationHistory, timeString, body) {
52
+ conversationHistory.pendingFeedbackMessage = body ? `Time: ${timeString}, ${body}` : `Current time: ${timeString}`;
53
+ }
54
+ collectPlanningFeedback(tasks) {
55
+ const feedbackMessages = tasks.flatMap(({ planningFeedback })=>planningFeedback ? [
56
+ truncatePlanningFeedback(planningFeedback)
57
+ ] : []);
58
+ return feedbackMessages.length > 0 ? feedbackMessages.join('\n\n') : void 0;
59
+ }
45
60
  async getTimeString(format) {
46
61
  if (this.useDeviceTime) if (this.interface.getDeviceLocalTimeString) try {
47
62
  return await this.interface.getDeviceLocalTimeString(format);
@@ -240,14 +255,14 @@ class TaskExecutor {
240
255
  }
241
256
  if (conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', conversationHistory.pendingFeedbackMessage);
242
257
  const initialTimeString = await this.getTimeString();
243
- conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;
244
258
  const taskCountBeforeRun = runner.tasks.length;
245
259
  try {
246
260
  await session.appendAndRun(executables.tasks);
261
+ this.setPendingFeedbackMessage(conversationHistory, initialTimeString, this.collectPlanningFeedback(runner.tasks.slice(taskCountBeforeRun)));
247
262
  } catch (error) {
248
263
  errorCountInOnePlanningLoop++;
249
264
  const timeString = await this.getTimeString();
250
- conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;
265
+ this.setPendingFeedbackMessage(conversationHistory, timeString, `Error executing running tasks: ${error?.message || String(error)}`);
251
266
  debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
252
267
  }
253
268
  if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
@@ -1 +1 @@
1
- {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { AIResponseParseError, ConversationHistory } from '@/ai-model';\nimport type { ModelRuntime } from '@/ai-model/models';\nimport { buildTypeQueryDemandValue } from '@/ai-model/prompt/extraction';\nimport { genericXmlPlan } from '@/ai-model/workflows/planning';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n multimodalPromptToChatMessages,\n userPromptToMultimodalPrompt,\n userPromptToString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n PlanningLocateParam,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { type TaskTitleType, taskTitleStr } from './ui-utils';\nimport { withUsageIntent } from './usage-intent';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nexport type ActionReportOptions = {\n type?: TaskTitleType;\n prompt?: string;\n};\n\nconst debug = getDebug('device-task-executor');\nconst warnLog = getDebug('device-task-executor', { console: true });\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n waitAfterAction?: number;\n\n useDeviceTime?: boolean;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n waitAfterAction?: number;\n useDeviceTime?: boolean;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.waitAfterAction = opts.waitAfterAction;\n this.useDeviceTime = opts.useDeviceTime;\n this.hooks = opts.hooks;\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n waitAfterAction: opts.waitAfterAction,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n /**\n * Get a readable time string. When device time is enabled, use the\n * device-formatted wall-clock time directly so host timezone formatting does\n * not reinterpret a device timestamp.\n * @param format - Optional format string\n * @returns A formatted time string\n */\n private async getTimeString(format?: string): Promise<string> {\n if (this.useDeviceTime) {\n if (this.interface.getDeviceLocalTimeString) {\n try {\n return await this.interface.getDeviceLocalTimeString(format);\n } catch (error) {\n warnLog(\n `Failed to get device time string, falling back to runtime time: ${error}`,\n );\n }\n } else {\n warnLog(\n 'useDeviceTime is enabled but getDeviceLocalTimeString is not implemented, falling back to runtime time.',\n );\n }\n }\n\n return getReadableTimeString(format);\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n options?: {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n },\n ) {\n return this.taskBuilder.build(plans, planningModel, defaultModel, options);\n }\n\n async loadYamlFlowAsPlanning(\n userInstruction: TUserPrompt,\n yamlString: string,\n reportOptions?: ActionReportOptions,\n ) {\n const session = this.createExecutionSession(\n taskTitleStr(\n reportOptions?.type || 'Act',\n reportOptions?.prompt || userPromptToString(userInstruction),\n ),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n ...(reportOptions?.prompt\n ? { userInstructionDisplay: reportOptions.prompt }\n : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n planningModel,\n defaultModel,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: TUserPrompt,\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n includeLocateInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: boolean,\n fileChooserAccept?: string[],\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n reportOptions?: ActionReportOptions,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n planningModel,\n defaultModel,\n includeLocateInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n deepLocate,\n abortSignal,\n reportOptions,\n );\n });\n }\n\n /**\n * Called when the task is about to replan. Marks every cache-hit locate task\n * in the just-run batch (tasks at index >= fromIndex) as stale: that batch\n * did not finish the task, so the element each cache hit produced is suspect.\n * The upcoming re-locate of the same prompt then replaces the bad entry in\n * place instead of appending a duplicate that would re-poison the cache on the\n * next run (#2529).\n *\n * Marking a locate that was actually fine is harmless: the step is only ever\n * replaced if the same prompt is located again (i.e. the step is redone),\n * which does not happen for a locate that already succeeded.\n */\n private invalidateFailedCacheHitLocates(\n runner: TaskRunner,\n fromIndex: number,\n ) {\n if (!this.taskCache) {\n return;\n }\n for (let i = fromIndex; i < runner.tasks.length; i++) {\n const task = runner.tasks[i];\n if (\n task.type === 'Planning' &&\n task.subType === 'Locate' &&\n task.hitBy?.from === 'Cache'\n ) {\n const prompt = (task.param as PlanningLocateParam | undefined)?.prompt;\n if (prompt) {\n this.taskCache.markLocateCacheStale(prompt);\n }\n }\n }\n }\n\n private async runAction(\n userPrompt: TUserPrompt,\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n includeLocateInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: boolean,\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n reportOptions?: ActionReportOptions,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n if (\n deepLocate &&\n !planningModel.adapter.planning.supportsActionDeepLocate\n ) {\n warnLog(\n `The \"deepLocate\" option is not supported for aiAct with the current planning adapter (modelFamily: ${planningModel.config.modelFamily ?? 'unknown'}). It will be ignored.`,\n );\n deepLocate = false;\n }\n\n const conversationHistory = new ConversationHistory();\n\n const session = this.createExecutionSession(\n taskTitleStr(\n reportOptions?.type || 'Act',\n reportOptions?.prompt || userPromptToString(userPrompt),\n ),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n const referenceImageMessages = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(userPrompt),\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n // Check abort signal before each planning cycle\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // Get sub-goal status text if available\n const subGoalStatus = conversationHistory.subGoalsToText() || undefined;\n\n // Get memories text if available\n const memoriesStatus = conversationHistory.memoriesToText() || undefined;\n\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n ...(reportOptions?.prompt\n ? { userInstructionDisplay: reportOptions.prompt }\n : {}),\n aiActContext,\n imagesIncludeCount,\n deepThink,\n ...(subGoalStatus ? { subGoalStatus } : {}),\n ...(memoriesStatus ? { memoriesStatus } : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const timing = executorContext.task.timing;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl =\n planningModel.adapter.planning.kind === 'custom'\n ? planningModel.adapter.planning.planFn\n : genericXmlPlan;\n\n let planResult: Awaited<ReturnType<typeof planImpl>>;\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n actionSpace,\n modelRuntime: planningModel,\n conversationHistory,\n includeLocateInPlanning,\n imagesIncludeCount,\n deepThink,\n referenceImageMessages,\n abortSignal,\n });\n } catch (planError) {\n if (planError instanceof AIResponseParseError) {\n // Record usage and rawResponse even when parsing fails\n executorContext.task.usage = withUsageIntent(\n planError.usage,\n 'planning',\n );\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse: planError.rawResponse,\n rawChoiceMessage: planError.rawChoiceMessage,\n };\n }\n throw planError;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n memory,\n error,\n usage,\n rawResponse,\n rawChoiceMessage,\n reasoning_content,\n finalizeSuccess,\n finalizeMessage,\n updateSubGoals,\n markFinishedIndexes,\n } = planResult;\n outputString = finalizeMessage;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n rawChoiceMessage,\n };\n executorContext.task.usage = withUsageIntent(usage, 'planning');\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n memory,\n yamlFlow: planResult.yamlFlow,\n output: finalizeMessage,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n updateSubGoals,\n markFinishedIndexes,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n // Check if task was finalized with failure\n if (finalizeSuccess === false) {\n assert(\n false,\n `Task failed: ${finalizeMessage || 'No error message provided'}\\n${log || ''}`,\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n planningModel,\n defaultModel,\n {\n cacheable,\n deepLocate,\n abortSignal,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n conversationHistory.pendingFeedbackMessage,\n );\n }\n\n // Set initial time context for the first planning call\n const initialTimeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;\n\n const taskCountBeforeRun = runner.tasks.length;\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check abort signal after executing actions\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // We are about to replan, which means the batch we just ran did not finish\n // the task. Any locate task in that batch that was served from cache\n // produced an element that failed to complete the step (the action threw,\n // or it clicked the wrong element and the goal was not reached). Mark those\n // cache entries stale so the re-locate of the same prompt replaces them in\n // place instead of appending a poisoning duplicate that would be matched\n // first on the next run (#2529).\n this.invalidateFailedCacheHitLocates(runner, taskCountBeforeRun);\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!conversationHistory.pendingFeedbackMessage) {\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelRuntime: ModelRuntime,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n domIncluded: opt?.domIncluded,\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n rawChoiceMessage: dump.taskInfo?.rawChoiceMessage,\n searchAreaRawChoiceMessage:\n dump.taskInfo?.searchAreaRawChoiceMessage,\n };\n task.usage = withUsageIntent(dump.taskInfo?.usage, 'insight');\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n demandInput = {\n [keyOfResult]: buildTypeQueryDemandValue(type, demand),\n };\n } else if (ifTypeRestricted) {\n keyOfResult = type;\n demandInput = {\n [keyOfResult]: buildTypeQueryDemandValue(type, demand),\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelRuntime,\n opt,\n extraPageDescription,\n multimodalPrompt,\n uiContext,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n // AI model may return {result: ...} instead of {[keyOfResult]: ...}\n if (data?.[keyOfResult] !== undefined) {\n outputResult = (data as any)[keyOfResult];\n } else if (data?.result !== undefined) {\n outputResult = (data as any).result;\n } else {\n assert(false, 'No result in query data');\n }\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelRuntime: ModelRuntime,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelRuntime,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelRuntime: ModelRuntime,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelRuntime,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const elapsed = now - currentCheckStart;\n const timeRemaining = checkIntervalMs - elapsed;\n const thought = `Check interval is ${checkIntervalMs}ms, ${elapsed}ms elapsed since last check, sleeping for ${timeRemaining}ms`;\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [{ type: 'Sleep', param: { timeMs: timeRemaining }, thought }],\n modelRuntime,\n modelRuntime,\n );\n if (sleepTasks[0]) {\n await session.appendAndRun(sleepTasks[0]);\n }\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = await getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","warnLog","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","format","error","getReadableTimeString","plans","planningModel","defaultModel","userInstruction","yamlString","reportOptions","session","taskTitleStr","userPromptToString","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeLocateInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","deepLocate","abortSignal","withFileChooser","fromIndex","i","prompt","conversationHistory","ConversationHistory","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","referenceImageMessages","multimodalPromptToChatMessages","userPromptToMultimodalPrompt","subGoalStatus","memoriesStatus","timing","actionSpace","action","Array","console","planImpl","genericXmlPlan","planResult","setTimingFieldOnce","planError","AIResponseParseError","withUsageIntent","JSON","actions","thought","log","memory","usage","rawResponse","rawChoiceMessage","reasoning_content","finalizeSuccess","finalizeMessage","updateSubGoals","markFinishedIndexes","executables","initialTimeString","taskCountBeforeRun","timeString","String","Error","errorMsg","type","demand","modelRuntime","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","buildTypeQueryDemandValue","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","elapsed","timeRemaining","sleepTasks","interfaceInstance","service","opts","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AA8DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,wBAAwB;IAAE,SAAS;AAAK;AACjE,MAAME,wCAAwC;AAIvC,MAAMC;IAsBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAiCQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IASA,MAAc,cAAcG,MAAe,EAAmB;QAC5D,IAAI,IAAI,CAAC,aAAa,EACpB,IAAI,IAAI,CAAC,SAAS,CAAC,wBAAwB,EACzC,IAAI;YACF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,wBAAwB,CAACA;QACvD,EAAE,OAAOC,OAAO;YACdR,QACE,CAAC,gEAAgE,EAAEQ,OAAO;QAE9E;aAEAR,QACE;QAKN,OAAOS,sBAAsBF;IAC/B;IAEA,MAAa,wBACXG,KAAuB,EACvBC,aAA2B,EAC3BC,YAA0B,EAC1BR,OAIC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACM,OAAOC,eAAeC,cAAcR;IACpE;IAEA,MAAM,uBACJS,eAA4B,EAC5BC,UAAkB,EAClBC,aAAmC,EACnC;QACA,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEF,eAAe,QAAQ,OACvBA,eAAe,UAAUG,mBAAmBL;QAIhD,MAAMM,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLN;gBACA,GAAIE,eAAe,SACf;oBAAE,wBAAwBA,cAAc,MAAM;gBAAC,IAC/C,CAAC,CAAC;YACR;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLR;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMU,SAASR,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACG;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJrB,KAAa,EACbO,KAAuB,EACvBC,aAA2B,EAC3BC,YAA0B,EACA;QAC1B,MAAMI,UAAU,IAAI,CAAC,sBAAsB,CAACb;QAC5C,MAAM,EAAEsB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDf,OACAC,eACAC;QAEF,MAAMY,SAASR,QAAQ,SAAS;QAChC,MAAMU,SAAS,MAAMV,QAAQ,YAAY,CAACS;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAuB,EACvBjB,aAA2B,EAC3BC,YAA0B,EAC1BiB,uBAAgC,EAChCC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAAmB,EACnBC,iBAA4B,EAC5BC,UAAoB,EACpBC,WAAyB,EACzBtB,aAAmC,EASnC;QACA,OAAOuB,gBAAgB,IAAI,CAAC,SAAS,EAAEH,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAjB,eACAC,cACAiB,yBACAC,cACAC,WACAC,8BACAC,oBACAC,WACAE,YACAC,aACAtB;IAGN;IAcQ,gCACNS,MAAkB,EAClBe,SAAiB,EACjB;QACA,IAAI,CAAC,IAAI,CAAC,SAAS,EACjB;QAEF,IAAK,IAAIC,IAAID,WAAWC,IAAIhB,OAAO,KAAK,CAAC,MAAM,EAAEgB,IAAK;YACpD,MAAMrB,OAAOK,OAAO,KAAK,CAACgB,EAAE;YAC5B,IACErB,AAAc,eAAdA,KAAK,IAAI,IACTA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,KAAK,KAAK,EAAE,SAAS,SACrB;gBACA,MAAMsB,SAAUtB,KAAK,KAAK,EAAsC;gBAChE,IAAIsB,QACF,IAAI,CAAC,SAAS,CAAC,oBAAoB,CAACA;YAExC;QACF;IACF;IAEA,MAAc,UACZb,UAAuB,EACvBjB,aAA2B,EAC3BC,YAA0B,EAC1BiB,uBAAgC,EAChCC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAAmB,EACnBE,UAAoB,EACpBC,WAAyB,EACzBtB,aAAmC,EASnC;QACA,IACEqB,cACA,CAACzB,cAAc,OAAO,CAAC,QAAQ,CAAC,wBAAwB,EACxD;YACAX,QACE,CAAC,mGAAmG,EAAEW,cAAc,MAAM,CAAC,WAAW,IAAI,UAAU,sBAAsB,CAAC;YAE7KyB,aAAa;QACf;QAEA,MAAMM,sBAAsB,IAAIC;QAEhC,MAAM3B,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEF,eAAe,QAAQ,OACvBA,eAAe,UAAUG,mBAAmBU;QAGhD,MAAMJ,SAASR,QAAQ,SAAS;QAEhC,IAAI4B,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJd,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEuB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAEJ,IAAIZ,aAAa,SACf,OAAOrB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEqB,YAAY,MAAM,IAAI,yBAAyB;QAGpE,MAAMa,yBAAyB,MAAMC,+BACnCC,6BAA6BxB;QAI/B,MAAO,KAAM;YAEX,IAAIS,aAAa,SACf,OAAOrB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEqB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,MAAMgB,gBAAgBX,oBAAoB,cAAc,MAAMK;YAG9D,MAAMO,iBAAiBZ,oBAAoB,cAAc,MAAMK;YAE/D,MAAMrB,SAAS,MAAMV,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBY;oBACjB,GAAIb,eAAe,SACf;wBAAE,wBAAwBA,cAAc,MAAM;oBAAC,IAC/C,CAAC,CAAC;oBACNe;oBACAG;oBACAC;oBACA,GAAImB,gBAAgB;wBAAEA;oBAAc,IAAI,CAAC,CAAC;oBAC1C,GAAIC,iBAAiB;wBAAEA;oBAAe,IAAI,CAAC,CAAC;gBAC9C;gBACA,UAAU,OAAOlC,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAMiC,SAASlC,gBAAgB,IAAI,CAAC,MAAM;oBAE1C,MAAMmC,cAAc,IAAI,CAAC,cAAc;oBACvC1D,MACE,sCACA0D,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDlC,OAAOmC,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WACJjD,AAAwC,aAAxCA,cAAc,OAAO,CAAC,QAAQ,CAAC,IAAI,GAC/BA,cAAc,OAAO,CAAC,QAAQ,CAAC,MAAM,GACrCkD;oBAEN,IAAIC;oBACJ,IAAI;wBACFC,mBAAmBR,QAAQ;wBAC3BO,aAAa,MAAMF,SAASxC,MAAM,eAAe,EAAE;4BACjD,SAASE;4BACT,eAAeF,MAAM,YAAY;4BACjCoC;4BACA,cAAc7C;4BACd+B;4BACAb;4BACAI;4BACAC;4BACAgB;4BACAb;wBACF;oBACF,EAAE,OAAO2B,WAAW;wBAClB,IAAIA,qBAAqBC,sBAAsB;4BAE7C5C,gBAAgB,IAAI,CAAC,KAAK,GAAG6C,gBAC3BF,UAAU,KAAK,EACf;4BAEF3C,gBAAgB,IAAI,CAAC,GAAG,GAAG;gCACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gCAClC,aAAa2C,UAAU,WAAW;gCAClC,kBAAkBA,UAAU,gBAAgB;4BAC9C;wBACF;wBACA,MAAMA;oBACR,SAAU;wBACRD,mBAAmBR,QAAQ;oBAC7B;oBACAzD,MAAM,cAAcqE,KAAK,SAAS,CAACL,YAAY,MAAM;oBAErD,MAAM,EACJM,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,MAAM,EACN/D,KAAK,EACLgE,KAAK,EACLC,WAAW,EACXC,gBAAgB,EAChBC,iBAAiB,EACjBC,eAAe,EACfC,eAAe,EACfC,cAAc,EACdC,mBAAmB,EACpB,GAAGjB;oBACJb,eAAe4B;oBAEfxD,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCoD;wBACAC;oBACF;oBACArD,gBAAgB,IAAI,CAAC,KAAK,GAAG6C,gBAAgBM,OAAO;oBACpDnD,gBAAgB,IAAI,CAAC,iBAAiB,GAAGsD;oBACzCtD,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS+C,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUT,WAAW,QAAQ;wBAC7B,QAAQe;wBACR,wBAAwBf,WAAW,sBAAsB;wBACzDgB;wBACAC;oBACF;oBACA1D,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACf,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAE8D,OAAO,IAAI;oBAG3D,IAAIM,AAAoB,UAApBA,iBACFrD,OACE,OACA,CAAC,aAAa,EAAEsD,mBAAmB,4BAA4B,EAAE,EAAEP,OAAO,IAAI;oBAIlF,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMR,aAAapC,QAAQ;YAG3B,MAAMhB,QAAQoD,YAAY,WAAW,EAAE;YACvCjB,SAAS,IAAI,IAAKiB,YAAY,YAAY,EAAE;YAE5C,IAAIkB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CtE,OACAC,eACAC,cACA;oBACEmB;oBACAK;oBACAC;gBACF;YAEJ,EAAE,OAAO7B,OAAO;gBACd,OAAOQ,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAER,MAAM,SAAS,EAAE2D,KAAK,SAAS,CAC5EzD,QACC;YAEP;YACA,IAAIgC,oBAAoB,sBAAsB,EAC5CiB,QAAQ,IAAI,CACV,8FACAjB,oBAAoB,sBAAsB;YAK9C,MAAMuC,oBAAoB,MAAM,IAAI,CAAC,aAAa;YAClDvC,oBAAoB,sBAAsB,IAAI,CAAC,cAAc,EAAEuC,mBAAmB;YAElF,MAAMC,qBAAqB1D,OAAO,KAAK,CAAC,MAAM;YAC9C,IAAI;gBACF,MAAMR,QAAQ,YAAY,CAACgE,YAAY,KAAK;YAC9C,EAAE,OAAOxE,OAAY;gBAEnBwC;gBACA,MAAMmC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CzC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEyC,WAAW,iCAAiC,EAAE3E,OAAO,WAAW4E,OAAO5E,QAAQ;gBACrIV,MACE,yFACAU,iBAAiB6E,QAAQ7E,MAAM,OAAO,GAAG4E,OAAO5E,QAChD,6CACAwC;YAEJ;YAEA,IAAIA,8BAA8B/C,uCAChC,OAAOe,QAAQ,eAAe,CAAC;YAIjC,IAAIqB,aAAa,SACf,OAAOrB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEqB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,IAAI,CAACyB,YAAY,wBACf;YAUF,IAAI,CAAC,+BAA+B,CAACtC,QAAQ0D;YAG7C,EAAEtC;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAMwC,WAAW,CAAC,UAAU,EAAExC,qBAAqB,4JAA4J,CAAC;gBAChN,OAAO9B,QAAQ,eAAe,CAACsE;YACjC;YAEA,IAAI,CAAC5C,oBAAoB,sBAAsB,EAAE;gBAC/C,MAAMyC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CzC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEyC,WAAW,gDAAgD,CAAC;YACpH;QACF;QAEA,OAAO;YACL,QAAQ;gBACNtC;gBACA,QAAQI;YACV;YACAzB;QACF;IACF;IAEQ,oBACN+D,IAAsE,EACtEC,MAA2B,EAC3BC,YAA0B,EAC1BC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,aAAaG,KAAK;gBAClB,YAAYC,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOpE,OAAOyE;gBACtB,MAAM,EAAE1E,IAAI,EAAE,GAAG0E;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ7E,KAAK,GAAG,GAAG;wBACT6E;wBACA,aAAaA,KAAK,QAAQ,EAAE;wBAC5B,kBAAkBA,KAAK,QAAQ,EAAE;wBACjC,4BACEA,KAAK,QAAQ,EAAE;oBACnB;oBACA7E,KAAK,KAAK,GAAG+C,gBAAgB8B,KAAK,QAAQ,EAAE,OAAO;oBACnD,IAAIA,KAAK,QAAQ,EAAE,mBACjB7E,KAAK,iBAAiB,GAAG6E,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAM1E,YAAYuE,YAAY,SAAS;gBACvCtE,OAAOD,WAAW;gBAElB,MAAM2E,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACdD,cAAc;wBACZ,CAACC,YAAY,EAAEC,0BAA0Bb,MAAMC;oBACjD;gBACF,OAAO,IAAIS,kBAAkB;oBAC3BE,cAAcZ;oBACdW,cAAc;wBACZ,CAACC,YAAY,EAAEC,0BAA0Bb,MAAMC;oBACjD;gBACF;gBAEA,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1D5F,MAAM;oBACN,MAAMyG,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,cACAC,KACAY,sBACAX,kBACArE;gBAEJ,EAAE,OAAOd,OAAO;oBACd,IAAIA,iBAAiBiG,cACnBV,UAAUvF,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEkG,IAAI,EAAErC,OAAO,EAAE2B,IAAI,EAAE,GAAGK;gBAChCN,UAAUC;gBAEV,IAAIW,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBAGf,IAAID,MAAM,CAACP,YAAY,KAAKpD,QAC1B4D,eAAgBD,IAAY,CAACP,YAAY;qBACpC,IAAIO,MAAM,WAAW3D,QAC1B4D,eAAgBD,KAAa,MAAM;qBAEnCnF,OAAO,OAAO;gBAKpB,IAAIgE,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCxF,KAAK,OAAO,GAAGkD;oBACf,MAAM,IAAIgB,MAAM,CAAC,kBAAkB,EAAEhB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQsC;oBACR,KAAKb;oBACLzB;gBACF;YACF;QACF;QAEA,OAAOuB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,YAA0B,EAC1BC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAM3E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEsE,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASrB,KAAK,SAAS,CAACqB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,cACAC,KACAC;QAGF,MAAMnE,SAASR,QAAQ,SAAS;QAChC,MAAMU,SAAS,MAAMV,QAAQ,YAAY,CAAC4E;QAE1C,IAAI,CAAClE,QACH,MAAM,IAAI2D,MACR;QAIJ,MAAM,EAAE1D,MAAM,EAAE0C,OAAO,EAAE,GAAG3C;QAE5B,OAAO;YACLC;YACA0C;YACA7C;QACF;IACF;IAEA,MAAM,QACJoF,SAAsB,EACtBlB,GAA+B,EAC/BD,YAA0B,EACM;QAChC,MAAM,EAAEoB,UAAU,EAAElB,gBAAgB,EAAE,GAAGmB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM7F,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW8F;QAE1B,MAAMvF,SAASR,QAAQ,SAAS;QAChC,MAAM,EACJgG,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG1B;QACJ,MAAM2B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEA7F,OAAOqF,WAAW;QAClBrF,OAAOyF,WAAW;QAClBzF,OAAO0F,iBAAiB;QAExB1F,OACE0F,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAM9B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAiB,YACApB,cACA4B,mBACA1B;YAGF,MAAMjE,SAAU,MAAMV,QAAQ,YAAY,CAAC4E;YAO3C,IAAIlE,QAAQ,QACV,OAAO;gBACL,QAAQqB;gBACRvB;YACF;YAGFiG,eACE/F,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEmF,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,UAAUD,MAAMD;gBACtB,MAAMG,gBAAgBZ,kBAAkBW;gBACxC,MAAMvD,UAAU,CAAC,kBAAkB,EAAE4C,gBAAgB,IAAI,EAAEW,QAAQ,0CAA0C,EAAEC,cAAc,EAAE,CAAC;gBAChI,MAAM,EAAE,OAAOC,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;oBAAC;wBAAE,MAAM;wBAAS,OAAO;4BAAE,QAAQD;wBAAc;wBAAGxD;oBAAQ;iBAAE,EAC9DoB,cACAA;gBAEF,IAAIqC,UAAU,CAAC,EAAE,EACf,MAAM9G,QAAQ,YAAY,CAAC8G,UAAU,CAAC,EAAE;YAE5C;QACF;QAEA,OAAO9G,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEyG,cAAc;IACnE;IA9xBA,YACEM,iBAAoC,EACpCC,OAAgB,EAChBC,IAQC,CACD;QArCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAEA;QAEA;QAoBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,eAAe,GAAGA,KAAK,eAAe;QAC3C,IAAI,CAAC,aAAa,GAAGA,KAAK,aAAa;QACvC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAGA,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCH;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;YAChC,iBAAiBA,KAAK,eAAe;QACvC;IACF;AAkwBF;AAEO,eAAe3F,gBACpByF,iBAAoC,EACpC5F,iBAAuC,EACvCsB,MAAwB;IAExB,IAAI,CAACtB,mBAAmB,QACtB,OAAOsB;IAGT,IAAI,CAACsE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI1C,MACR,CAAC,gCAAgC,EAAE0C,kBAAkB,aAAa,EAAE;IAIxE,MAAMI,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAACjG;IACvB;IAEA,MAAM,EAAEkG,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMP,kBAAkB,2BAA2B,CAACI;IACtD,IAAI;QACF,MAAMzG,SAAS,MAAM+B;QAErB,MAAMjD,QAAQ,MAAM8H;QACpB,IAAI9H,OACF,MAAMA;QAER,OAAOkB;IACT,SAAU;QACR2G;IACF;AACF"}
1
+ {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { AIResponseParseError, ConversationHistory } from '@/ai-model';\nimport type { ModelRuntime } from '@/ai-model/models';\nimport { buildTypeQueryDemandValue } from '@/ai-model/prompt/extraction';\nimport { genericXmlPlan } from '@/ai-model/workflows/planning';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n multimodalPromptToChatMessages,\n userPromptToMultimodalPrompt,\n userPromptToString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeviceAction,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n PlanningLocateParam,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { type TaskTitleType, taskTitleStr } from './ui-utils';\nimport { withUsageIntent } from './usage-intent';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nexport type ActionReportOptions = {\n type?: TaskTitleType;\n prompt?: string;\n};\n\nconst debug = getDebug('device-task-executor');\nconst warnLog = getDebug('device-task-executor', { console: true });\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\n// Cap each task's planning feedback so a large action output (e.g. a long adb\n// shell stdout) cannot blow up the next planning request's context. This is the\n// single place that truncates feedback before it is sent to the model; action\n// implementations should hand over the untruncated value.\nconst maxPlanningFeedbackLength = 500;\n\nfunction truncatePlanningFeedback(feedback: string): string {\n if (feedback.length <= maxPlanningFeedbackLength) {\n return feedback;\n }\n\n return `${feedback.slice(0, maxPlanningFeedbackLength)}\n...[truncated, ${feedback.length - maxPlanningFeedbackLength} more characters]`;\n}\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n waitAfterAction?: number;\n\n useDeviceTime?: boolean;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n waitAfterAction?: number;\n useDeviceTime?: boolean;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.waitAfterAction = opts.waitAfterAction;\n this.useDeviceTime = opts.useDeviceTime;\n this.hooks = opts.hooks;\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n waitAfterAction: opts.waitAfterAction,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n /**\n * Set the pending feedback message consumed by the next planning round.\n * The message is always prefixed with the current time. When a body is\n * provided it is appended after the timestamp; otherwise only the time\n * context is recorded. This is the single entry point for writing\n * `pendingFeedbackMessage` so the time prefix stays consistent.\n */\n private setPendingFeedbackMessage(\n conversationHistory: ConversationHistory,\n timeString: string,\n body?: string,\n ) {\n conversationHistory.pendingFeedbackMessage = body\n ? `Time: ${timeString}, ${body}`\n : `Current time: ${timeString}`;\n }\n\n /**\n * Collect feedback produced by executed tasks for the next planning round.\n * Returns undefined when no task reported feedback.\n */\n private collectPlanningFeedback(tasks: ExecutionTask[]): string | undefined {\n const feedbackMessages = tasks.flatMap(({ planningFeedback }) =>\n planningFeedback ? [truncatePlanningFeedback(planningFeedback)] : [],\n );\n return feedbackMessages.length > 0\n ? feedbackMessages.join('\\n\\n')\n : undefined;\n }\n\n /**\n * Get a readable time string. When device time is enabled, use the\n * device-formatted wall-clock time directly so host timezone formatting does\n * not reinterpret a device timestamp.\n * @param format - Optional format string\n * @returns A formatted time string\n */\n private async getTimeString(format?: string): Promise<string> {\n if (this.useDeviceTime) {\n if (this.interface.getDeviceLocalTimeString) {\n try {\n return await this.interface.getDeviceLocalTimeString(format);\n } catch (error) {\n warnLog(\n `Failed to get device time string, falling back to runtime time: ${error}`,\n );\n }\n } else {\n warnLog(\n 'useDeviceTime is enabled but getDeviceLocalTimeString is not implemented, falling back to runtime time.',\n );\n }\n }\n\n return getReadableTimeString(format);\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n options?: {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n },\n ) {\n return this.taskBuilder.build(plans, planningModel, defaultModel, options);\n }\n\n async loadYamlFlowAsPlanning(\n userInstruction: TUserPrompt,\n yamlString: string,\n reportOptions?: ActionReportOptions,\n ) {\n const session = this.createExecutionSession(\n taskTitleStr(\n reportOptions?.type || 'Act',\n reportOptions?.prompt || userPromptToString(userInstruction),\n ),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n ...(reportOptions?.prompt\n ? { userInstructionDisplay: reportOptions.prompt }\n : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n planningModel,\n defaultModel,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: TUserPrompt,\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n includeLocateInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: boolean,\n fileChooserAccept?: string[],\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n reportOptions?: ActionReportOptions,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n planningModel,\n defaultModel,\n includeLocateInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n deepLocate,\n abortSignal,\n reportOptions,\n );\n });\n }\n\n /**\n * Called when the task is about to replan. Marks every cache-hit locate task\n * in the just-run batch (tasks at index >= fromIndex) as stale: that batch\n * did not finish the task, so the element each cache hit produced is suspect.\n * The upcoming re-locate of the same prompt then replaces the bad entry in\n * place instead of appending a duplicate that would re-poison the cache on the\n * next run (#2529).\n *\n * Marking a locate that was actually fine is harmless: the step is only ever\n * replaced if the same prompt is located again (i.e. the step is redone),\n * which does not happen for a locate that already succeeded.\n */\n private invalidateFailedCacheHitLocates(\n runner: TaskRunner,\n fromIndex: number,\n ) {\n if (!this.taskCache) {\n return;\n }\n for (let i = fromIndex; i < runner.tasks.length; i++) {\n const task = runner.tasks[i];\n if (\n task.type === 'Planning' &&\n task.subType === 'Locate' &&\n task.hitBy?.from === 'Cache'\n ) {\n const prompt = (task.param as PlanningLocateParam | undefined)?.prompt;\n if (prompt) {\n this.taskCache.markLocateCacheStale(prompt);\n }\n }\n }\n }\n\n private async runAction(\n userPrompt: TUserPrompt,\n planningModel: ModelRuntime,\n defaultModel: ModelRuntime,\n includeLocateInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: boolean,\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n reportOptions?: ActionReportOptions,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n if (\n deepLocate &&\n !planningModel.adapter.planning.supportsActionDeepLocate\n ) {\n warnLog(\n `The \"deepLocate\" option is not supported for aiAct with the current planning adapter (modelFamily: ${planningModel.config.modelFamily ?? 'unknown'}). It will be ignored.`,\n );\n deepLocate = false;\n }\n\n const conversationHistory = new ConversationHistory();\n\n const session = this.createExecutionSession(\n taskTitleStr(\n reportOptions?.type || 'Act',\n reportOptions?.prompt || userPromptToString(userPrompt),\n ),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n const referenceImageMessages = await multimodalPromptToChatMessages(\n userPromptToMultimodalPrompt(userPrompt),\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n // Check abort signal before each planning cycle\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // Get sub-goal status text if available\n const subGoalStatus = conversationHistory.subGoalsToText() || undefined;\n\n // Get memories text if available\n const memoriesStatus = conversationHistory.memoriesToText() || undefined;\n\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n ...(reportOptions?.prompt\n ? { userInstructionDisplay: reportOptions.prompt }\n : {}),\n aiActContext,\n imagesIncludeCount,\n deepThink,\n ...(subGoalStatus ? { subGoalStatus } : {}),\n ...(memoriesStatus ? { memoriesStatus } : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const timing = executorContext.task.timing;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl =\n planningModel.adapter.planning.kind === 'custom'\n ? planningModel.adapter.planning.planFn\n : genericXmlPlan;\n\n let planResult: Awaited<ReturnType<typeof planImpl>>;\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n actionSpace,\n modelRuntime: planningModel,\n conversationHistory,\n includeLocateInPlanning,\n imagesIncludeCount,\n deepThink,\n referenceImageMessages,\n abortSignal,\n });\n } catch (planError) {\n if (planError instanceof AIResponseParseError) {\n // Record usage and rawResponse even when parsing fails\n executorContext.task.usage = withUsageIntent(\n planError.usage,\n 'planning',\n );\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse: planError.rawResponse,\n rawChoiceMessage: planError.rawChoiceMessage,\n };\n }\n throw planError;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n memory,\n error,\n usage,\n rawResponse,\n rawChoiceMessage,\n reasoning_content,\n finalizeSuccess,\n finalizeMessage,\n updateSubGoals,\n markFinishedIndexes,\n } = planResult;\n outputString = finalizeMessage;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n rawChoiceMessage,\n };\n executorContext.task.usage = withUsageIntent(usage, 'planning');\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n memory,\n yamlFlow: planResult.yamlFlow,\n output: finalizeMessage,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n updateSubGoals,\n markFinishedIndexes,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n // Check if task was finalized with failure\n if (finalizeSuccess === false) {\n assert(\n false,\n `Task failed: ${finalizeMessage || 'No error message provided'}\\n${log || ''}`,\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n planningModel,\n defaultModel,\n {\n cacheable,\n deepLocate,\n abortSignal,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n conversationHistory.pendingFeedbackMessage,\n );\n }\n\n // Capture the time context for the next planning call before running.\n const initialTimeString = await this.getTimeString();\n\n const taskCountBeforeRun = runner.tasks.length;\n try {\n await session.appendAndRun(executables.tasks);\n this.setPendingFeedbackMessage(\n conversationHistory,\n initialTimeString,\n this.collectPlanningFeedback(runner.tasks.slice(taskCountBeforeRun)),\n );\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n const timeString = await this.getTimeString();\n this.setPendingFeedbackMessage(\n conversationHistory,\n timeString,\n `Error executing running tasks: ${error?.message || String(error)}`,\n );\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check abort signal after executing actions\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // We are about to replan, which means the batch we just ran did not finish\n // the task. Any locate task in that batch that was served from cache\n // produced an element that failed to complete the step (the action threw,\n // or it clicked the wrong element and the goal was not reached). Mark those\n // cache entries stale so the re-locate of the same prompt replaces them in\n // place instead of appending a poisoning duplicate that would be matched\n // first on the next run (#2529).\n this.invalidateFailedCacheHitLocates(runner, taskCountBeforeRun);\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!conversationHistory.pendingFeedbackMessage) {\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelRuntime: ModelRuntime,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n domIncluded: opt?.domIncluded,\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n rawChoiceMessage: dump.taskInfo?.rawChoiceMessage,\n searchAreaRawChoiceMessage:\n dump.taskInfo?.searchAreaRawChoiceMessage,\n };\n task.usage = withUsageIntent(dump.taskInfo?.usage, 'insight');\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n demandInput = {\n [keyOfResult]: buildTypeQueryDemandValue(type, demand),\n };\n } else if (ifTypeRestricted) {\n keyOfResult = type;\n demandInput = {\n [keyOfResult]: buildTypeQueryDemandValue(type, demand),\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelRuntime,\n opt,\n extraPageDescription,\n multimodalPrompt,\n uiContext,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n // AI model may return {result: ...} instead of {[keyOfResult]: ...}\n if (data?.[keyOfResult] !== undefined) {\n outputResult = (data as any)[keyOfResult];\n } else if (data?.result !== undefined) {\n outputResult = (data as any).result;\n } else {\n assert(false, 'No result in query data');\n }\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelRuntime: ModelRuntime,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelRuntime,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelRuntime: ModelRuntime,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelRuntime,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const elapsed = now - currentCheckStart;\n const timeRemaining = checkIntervalMs - elapsed;\n const thought = `Check interval is ${checkIntervalMs}ms, ${elapsed}ms elapsed since last check, sleeping for ${timeRemaining}ms`;\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [{ type: 'Sleep', param: { timeMs: timeRemaining }, thought }],\n modelRuntime,\n modelRuntime,\n );\n if (sleepTasks[0]) {\n await session.appendAndRun(sleepTasks[0]);\n }\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = await getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","warnLog","maxErrorCountAllowedInOnePlanningLoop","maxPlanningFeedbackLength","truncatePlanningFeedback","feedback","TaskExecutor","title","options","ExecutionSession","Promise","conversationHistory","timeString","body","tasks","feedbackMessages","planningFeedback","undefined","format","error","getReadableTimeString","plans","planningModel","defaultModel","userInstruction","yamlString","reportOptions","session","taskTitleStr","userPromptToString","task","param","executorContext","uiContext","assert","runner","result","output","userPrompt","includeLocateInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","deepLocate","abortSignal","withFileChooser","fromIndex","i","prompt","ConversationHistory","replanCount","yamlFlow","replanningCycleLimit","errorCountInOnePlanningLoop","outputString","referenceImageMessages","multimodalPromptToChatMessages","userPromptToMultimodalPrompt","subGoalStatus","memoriesStatus","timing","actionSpace","action","Array","console","planImpl","genericXmlPlan","planResult","setTimingFieldOnce","planError","AIResponseParseError","withUsageIntent","JSON","actions","thought","log","memory","usage","rawResponse","rawChoiceMessage","reasoning_content","finalizeSuccess","finalizeMessage","updateSubGoals","markFinishedIndexes","executables","initialTimeString","taskCountBeforeRun","String","Error","errorMsg","type","demand","modelRuntime","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","buildTypeQueryDemandValue","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","elapsed","timeRemaining","sleepTasks","interfaceInstance","service","opts","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;AA+DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,wBAAwB;IAAE,SAAS;AAAK;AACjE,MAAME,wCAAwC;AAM9C,MAAMC,4BAA4B;AAElC,SAASC,yBAAyBC,QAAgB;IAChD,IAAIA,SAAS,MAAM,IAAIF,2BACrB,OAAOE;IAGT,OAAO,GAAGA,SAAS,KAAK,CAAC,GAAGF,2BAA2B;eAC1C,EAAEE,SAAS,MAAM,GAAGF,0BAA0B,iBAAiB,CAAC;AAC/E;AAIO,MAAMG;IAsBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAiCQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IASQ,0BACNG,mBAAwC,EACxCC,UAAkB,EAClBC,IAAa,EACb;QACAF,oBAAoB,sBAAsB,GAAGE,OACzC,CAAC,MAAM,EAAED,WAAW,EAAE,EAAEC,MAAM,GAC9B,CAAC,cAAc,EAAED,YAAY;IACnC;IAMQ,wBAAwBE,KAAsB,EAAsB;QAC1E,MAAMC,mBAAmBD,MAAM,OAAO,CAAC,CAAC,EAAEE,gBAAgB,EAAE,GAC1DA,mBAAmB;gBAACZ,yBAAyBY;aAAkB,GAAG,EAAE;QAEtE,OAAOD,iBAAiB,MAAM,GAAG,IAC7BA,iBAAiB,IAAI,CAAC,UACtBE;IACN;IASA,MAAc,cAAcC,MAAe,EAAmB;QAC5D,IAAI,IAAI,CAAC,aAAa,EACpB,IAAI,IAAI,CAAC,SAAS,CAAC,wBAAwB,EACzC,IAAI;YACF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,wBAAwB,CAACA;QACvD,EAAE,OAAOC,OAAO;YACdlB,QACE,CAAC,gEAAgE,EAAEkB,OAAO;QAE9E;aAEAlB,QACE;QAKN,OAAOmB,sBAAsBF;IAC/B;IAEA,MAAa,wBACXG,KAAuB,EACvBC,aAA2B,EAC3BC,YAA0B,EAC1Bf,OAIC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACa,OAAOC,eAAeC,cAAcf;IACpE;IAEA,MAAM,uBACJgB,eAA4B,EAC5BC,UAAkB,EAClBC,aAAmC,EACnC;QACA,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEF,eAAe,QAAQ,OACvBA,eAAe,UAAUG,mBAAmBL;QAIhD,MAAMM,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLN;gBACA,GAAIE,eAAe,SACf;oBAAE,wBAAwBA,cAAc,MAAM;gBAAC,IAC/C,CAAC,CAAC;YACR;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLR;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMU,SAASR,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACG;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJ5B,KAAa,EACbc,KAAuB,EACvBC,aAA2B,EAC3BC,YAA0B,EACA;QAC1B,MAAMI,UAAU,IAAI,CAAC,sBAAsB,CAACpB;QAC5C,MAAM,EAAEO,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDO,OACAC,eACAC;QAEF,MAAMY,SAASR,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACb;QAC1C,MAAM,EAAEuB,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAF;QACF;IACF;IAEA,MAAM,OACJG,UAAuB,EACvBhB,aAA2B,EAC3BC,YAA0B,EAC1BgB,uBAAgC,EAChCC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAAmB,EACnBC,iBAA4B,EAC5BC,UAAoB,EACpBC,WAAyB,EACzBrB,aAAmC,EASnC;QACA,OAAOsB,gBAAgB,IAAI,CAAC,SAAS,EAAEH,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAhB,eACAC,cACAgB,yBACAC,cACAC,WACAC,8BACAC,oBACAC,WACAE,YACAC,aACArB;IAGN;IAcQ,gCACNS,MAAkB,EAClBc,SAAiB,EACjB;QACA,IAAI,CAAC,IAAI,CAAC,SAAS,EACjB;QAEF,IAAK,IAAIC,IAAID,WAAWC,IAAIf,OAAO,KAAK,CAAC,MAAM,EAAEe,IAAK;YACpD,MAAMpB,OAAOK,OAAO,KAAK,CAACe,EAAE;YAC5B,IACEpB,AAAc,eAAdA,KAAK,IAAI,IACTA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,KAAK,KAAK,EAAE,SAAS,SACrB;gBACA,MAAMqB,SAAUrB,KAAK,KAAK,EAAsC;gBAChE,IAAIqB,QACF,IAAI,CAAC,SAAS,CAAC,oBAAoB,CAACA;YAExC;QACF;IACF;IAEA,MAAc,UACZb,UAAuB,EACvBhB,aAA2B,EAC3BC,YAA0B,EAC1BgB,uBAAgC,EAChCC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAAmB,EACnBE,UAAoB,EACpBC,WAAyB,EACzBrB,aAAmC,EASnC;QACA,IACEoB,cACA,CAACxB,cAAc,OAAO,CAAC,QAAQ,CAAC,wBAAwB,EACxD;YACArB,QACE,CAAC,mGAAmG,EAAEqB,cAAc,MAAM,CAAC,WAAW,IAAI,UAAU,sBAAsB,CAAC;YAE7KwB,aAAa;QACf;QAEA,MAAMnC,sBAAsB,IAAIyC;QAEhC,MAAMzB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEF,eAAe,QAAQ,OACvBA,eAAe,UAAUG,mBAAmBS;QAGhD,MAAMH,SAASR,QAAQ,SAAS;QAEhC,IAAI0B,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJb,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DR,OACEqB,AAAyBtC,WAAzBsC,sBACA;QAGF,IAAIC,8BAA8B;QAClC,IAAIC;QAEJ,IAAIV,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;QAGpE,MAAMW,yBAAyB,MAAMC,+BACnCC,6BAA6BtB;QAI/B,MAAO,KAAM;YAEX,IAAIS,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,MAAMc,gBAAgBlD,oBAAoB,cAAc,MAAMM;YAG9D,MAAM6C,iBAAiBnD,oBAAoB,cAAc,MAAMM;YAE/D,MAAMmB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjB,GAAIZ,eAAe,SACf;wBAAE,wBAAwBA,cAAc,MAAM;oBAAC,IAC/C,CAAC,CAAC;oBACNc;oBACAG;oBACAC;oBACA,GAAIiB,gBAAgB;wBAAEA;oBAAc,IAAI,CAAC,CAAC;oBAC1C,GAAIC,iBAAiB;wBAAEA;oBAAe,IAAI,CAAC,CAAC;gBAC9C;gBACA,UAAU,OAAO/B,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM8B,SAAS/B,gBAAgB,IAAI,CAAC,MAAM;oBAE1C,MAAMgC,cAAc,IAAI,CAAC,cAAc;oBACvCjE,MACE,sCACAiE,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhD/B,OAAOgC,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WACJ9C,AAAwC,aAAxCA,cAAc,OAAO,CAAC,QAAQ,CAAC,IAAI,GAC/BA,cAAc,OAAO,CAAC,QAAQ,CAAC,MAAM,GACrC+C;oBAEN,IAAIC;oBACJ,IAAI;wBACFC,mBAAmBR,QAAQ;wBAC3BO,aAAa,MAAMF,SAASrC,MAAM,eAAe,EAAE;4BACjD,SAASE;4BACT,eAAeF,MAAM,YAAY;4BACjCiC;4BACA,cAAc1C;4BACdX;4BACA4B;4BACAI;4BACAC;4BACAc;4BACAX;wBACF;oBACF,EAAE,OAAOyB,WAAW;wBAClB,IAAIA,qBAAqBC,sBAAsB;4BAE7CzC,gBAAgB,IAAI,CAAC,KAAK,GAAG0C,gBAC3BF,UAAU,KAAK,EACf;4BAEFxC,gBAAgB,IAAI,CAAC,GAAG,GAAG;gCACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gCAClC,aAAawC,UAAU,WAAW;gCAClC,kBAAkBA,UAAU,gBAAgB;4BAC9C;wBACF;wBACA,MAAMA;oBACR,SAAU;wBACRD,mBAAmBR,QAAQ;oBAC7B;oBACAhE,MAAM,cAAc4E,KAAK,SAAS,CAACL,YAAY,MAAM;oBAErD,MAAM,EACJM,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,MAAM,EACN5D,KAAK,EACL6D,KAAK,EACLC,WAAW,EACXC,gBAAgB,EAChBC,iBAAiB,EACjBC,eAAe,EACfC,eAAe,EACfC,cAAc,EACdC,mBAAmB,EACpB,GAAGjB;oBACJb,eAAe4B;oBAEfrD,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCiD;wBACAC;oBACF;oBACAlD,gBAAgB,IAAI,CAAC,KAAK,GAAG0C,gBAAgBM,OAAO;oBACpDhD,gBAAgB,IAAI,CAAC,iBAAiB,GAAGmD;oBACzCnD,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS4C,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUT,WAAW,QAAQ;wBAC7B,QAAQe;wBACR,wBAAwBf,WAAW,sBAAsB;wBACzDgB;wBACAC;oBACF;oBACAvD,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACf,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAE2D,OAAO,IAAI;oBAG3D,IAAIM,AAAoB,UAApBA,iBACFlD,OACE,OACA,CAAC,aAAa,EAAEmD,mBAAmB,4BAA4B,EAAE,EAAEP,OAAO,IAAI;oBAIlF,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMR,aAAalC,QAAQ;YAG3B,MAAMf,QAAQiD,YAAY,WAAW,EAAE;YACvChB,SAAS,IAAI,IAAKgB,YAAY,YAAY,EAAE;YAE5C,IAAIkB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CnE,OACAC,eACAC,cACA;oBACEkB;oBACAK;oBACAC;gBACF;YAEJ,EAAE,OAAO5B,OAAO;gBACd,OAAOQ,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAER,MAAM,SAAS,EAAEwD,KAAK,SAAS,CAC5EtD,QACC;YAEP;YACA,IAAIV,oBAAoB,sBAAsB,EAC5CwD,QAAQ,IAAI,CACV,8FACAxD,oBAAoB,sBAAsB;YAK9C,MAAM8E,oBAAoB,MAAM,IAAI,CAAC,aAAa;YAElD,MAAMC,qBAAqBvD,OAAO,KAAK,CAAC,MAAM;YAC9C,IAAI;gBACF,MAAMR,QAAQ,YAAY,CAAC6D,YAAY,KAAK;gBAC5C,IAAI,CAAC,yBAAyB,CAC5B7E,qBACA8E,mBACA,IAAI,CAAC,uBAAuB,CAACtD,OAAO,KAAK,CAAC,KAAK,CAACuD;YAEpD,EAAE,OAAOvE,OAAY;gBAEnBqC;gBACA,MAAM5C,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3C,IAAI,CAAC,yBAAyB,CAC5BD,qBACAC,YACA,CAAC,+BAA+B,EAAEO,OAAO,WAAWwE,OAAOxE,QAAQ;gBAErEpB,MACE,yFACAoB,iBAAiByE,QAAQzE,MAAM,OAAO,GAAGwE,OAAOxE,QAChD,6CACAqC;YAEJ;YAEA,IAAIA,8BAA8BtD,uCAChC,OAAOyB,QAAQ,eAAe,CAAC;YAIjC,IAAIoB,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,IAAI,CAACuB,YAAY,wBACf;YAUF,IAAI,CAAC,+BAA+B,CAACnC,QAAQuD;YAG7C,EAAErC;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAMsC,WAAW,CAAC,UAAU,EAAEtC,qBAAqB,4JAA4J,CAAC;gBAChN,OAAO5B,QAAQ,eAAe,CAACkE;YACjC;YAEA,IAAI,CAAClF,oBAAoB,sBAAsB,EAAE;gBAC/C,MAAMC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CD,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEC,WAAW,gDAAgD,CAAC;YACpH;QACF;QAEA,OAAO;YACL,QAAQ;gBACN0C;gBACA,QAAQG;YACV;YACAtB;QACF;IACF;IAEQ,oBACN2D,IAAsE,EACtEC,MAA2B,EAC3BC,YAA0B,EAC1BC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,aAAaG,KAAK;gBAClB,YAAYC,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOhE,OAAOqE;gBACtB,MAAM,EAAEtE,IAAI,EAAE,GAAGsE;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZzE,KAAK,GAAG,GAAG;wBACTyE;wBACA,aAAaA,KAAK,QAAQ,EAAE;wBAC5B,kBAAkBA,KAAK,QAAQ,EAAE;wBACjC,4BACEA,KAAK,QAAQ,EAAE;oBACnB;oBACAzE,KAAK,KAAK,GAAG4C,gBAAgB6B,KAAK,QAAQ,EAAE,OAAO;oBACnD,IAAIA,KAAK,QAAQ,EAAE,mBACjBzE,KAAK,iBAAiB,GAAGyE,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMtE,YAAYmE,YAAY,SAAS;gBACvClE,OAAOD,WAAW;gBAElB,MAAMuE,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACdD,cAAc;wBACZ,CAACC,YAAY,EAAEC,0BAA0Bb,MAAMC;oBACjD;gBACF,OAAO,IAAIS,kBAAkB;oBAC3BE,cAAcZ;oBACdW,cAAc;wBACZ,CAACC,YAAY,EAAEC,0BAA0Bb,MAAMC;oBACjD;gBACF;gBAEA,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DlG,MAAM;oBACN,MAAM+G,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,cACAC,KACAY,sBACAX,kBACAjE;gBAEJ,EAAE,OAAOd,OAAO;oBACd,IAAIA,iBAAiB6F,cACnBV,UAAUnF,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE8F,IAAI,EAAEpC,OAAO,EAAE0B,IAAI,EAAE,GAAGK;gBAChCN,UAAUC;gBAEV,IAAIW,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBAGf,IAAID,MAAM,CAACP,YAAY,KAAKzF,QAC1BiG,eAAgBD,IAAY,CAACP,YAAY;qBACpC,IAAIO,MAAM,WAAWhG,QAC1BiG,eAAgBD,KAAa,MAAM;qBAEnC/E,OAAO,OAAO;gBAKpB,IAAI4D,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCpF,KAAK,OAAO,GAAG+C;oBACf,MAAM,IAAIe,MAAM,CAAC,kBAAkB,EAAEf,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQqC;oBACR,KAAKb;oBACLxB;gBACF;YACF;QACF;QAEA,OAAOsB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,YAA0B,EAC1BC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMvE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEkE,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASpB,KAAK,SAAS,CAACoB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,cACAC,KACAC;QAGF,MAAM/D,SAASR,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACwE;QAE1C,IAAI,CAAC/D,QACH,MAAM,IAAIwD,MACR;QAIJ,MAAM,EAAEvD,MAAM,EAAEwC,OAAO,EAAE,GAAGzC;QAE5B,OAAO;YACLC;YACAwC;YACA1C;QACF;IACF;IAEA,MAAM,QACJgF,SAAsB,EACtBlB,GAA+B,EAC/BD,YAA0B,EACM;QAChC,MAAM,EAAEoB,UAAU,EAAElB,gBAAgB,EAAE,GAAGmB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMzF,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW0F;QAE1B,MAAMnF,SAASR,QAAQ,SAAS;QAChC,MAAM,EACJ4F,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG1B;QACJ,MAAM2B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEAzF,OAAOiF,WAAW;QAClBjF,OAAOqF,WAAW;QAClBrF,OAAOsF,iBAAiB;QAExBtF,OACEsF,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAM9B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAiB,YACApB,cACA4B,mBACA1B;YAGF,MAAM9D,SAAU,MAAMT,QAAQ,YAAY,CAACwE;YAO3C,IAAI/D,QAAQ,QACV,OAAO;gBACL,QAAQnB;gBACRkB;YACF;YAGF6F,eACE5F,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEgF,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,UAAUD,MAAMD;gBACtB,MAAMG,gBAAgBZ,kBAAkBW;gBACxC,MAAMtD,UAAU,CAAC,kBAAkB,EAAE2C,gBAAgB,IAAI,EAAEW,QAAQ,0CAA0C,EAAEC,cAAc,EAAE,CAAC;gBAChI,MAAM,EAAE,OAAOC,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;oBAAC;wBAAE,MAAM;wBAAS,OAAO;4BAAE,QAAQD;wBAAc;wBAAGvD;oBAAQ;iBAAE,EAC9DmB,cACAA;gBAEF,IAAIqC,UAAU,CAAC,EAAE,EACf,MAAM1G,QAAQ,YAAY,CAAC0G,UAAU,CAAC,EAAE;YAE5C;QACF;QAEA,OAAO1G,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEqG,cAAc;IACnE;IAp0BA,YACEM,iBAAoC,EACpCC,OAAgB,EAChBC,IAQC,CACD;QArCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAEA;QAEA;QAoBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,eAAe,GAAGA,KAAK,eAAe;QAC3C,IAAI,CAAC,aAAa,GAAGA,KAAK,aAAa;QACvC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAGA,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCH;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;YAChC,iBAAiBA,KAAK,eAAe;QACvC;IACF;AAwyBF;AAEO,eAAexF,gBACpBsF,iBAAoC,EACpCzF,iBAAuC,EACvCoB,MAAwB;IAExB,IAAI,CAACpB,mBAAmB,QACtB,OAAOoB;IAGT,IAAI,CAACqE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI1C,MACR,CAAC,gCAAgC,EAAE0C,kBAAkB,aAAa,EAAE;IAIxE,MAAMI,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAAC9F;IACvB;IAEA,MAAM,EAAE+F,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMP,kBAAkB,2BAA2B,CAACI;IACtD,IAAI;QACF,MAAMtG,SAAS,MAAM6B;QAErB,MAAM9C,QAAQ,MAAM0H;QACpB,IAAI1H,OACF,MAAMA;QAER,OAAOiB;IACT,SAAU;QACRwG;IACF;AACF"}
@@ -120,7 +120,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
120
120
  return;
121
121
  }
122
122
  }
123
- const getMidsceneVersion = ()=>"1.9.4";
123
+ const getMidsceneVersion = ()=>"1.9.5-beta-20260611033424.0";
124
124
  const parsePrompt = (prompt)=>{
125
125
  if ('string' == typeof prompt) return {
126
126
  textPrompt: prompt,
@@ -10,7 +10,17 @@ const locateParamSchemaDescription = (promptSpec)=>{
10
10
  };
11
11
  const OBSERVE_STEP_NOTES = "### Observation Guidelines\n\n- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.\n- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.";
12
12
  const MEMORY_STEP_NOTES = "Use `<memory>` to record clear, task-relevant information from the current screenshot that may be needed in later steps. The current screenshot will not be available later, so memory should preserve enough detail for future reasoning, verification, or action.\n\n- Record information completely and exactly as shown. Do not summarize, translate, normalize, or merge values that may matter later.\n- When recording an item, include the item itself, its exact task-relevant details, and the visible cue or UI context that identifies where it came from when relevant.\n- Keep similar or repeated items as separate memory entries unless their task-relevant details are confirmed to be the same.\n- After navigation, scrolling, editing, deletion, saving, or other screen changes, treat remembered positions, order, indexes, and UI bindings as references only. Re-check the current screen before acting on them.\n\nExamples:\n- If you need to find an item and later assert its details, record the item name and the exact details needed for the assertion, such as status, price, date, owner, description, or other visible fields.\n- If you need to compare multiple similar results, record each candidate separately with its exact distinguishing details and visible context.\n- If you need to copy information from one place to another, record the exact source value and the target field or UI cue it should be mapped to.";
13
- const ACTION_STEP_NOTES = '### Action Guidelines\n\n- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters, and use Input with mode "typeOnly" when typing new characters for such edits.';
13
+ const RUN_ADB_SHELL_ACTION_GUIDANCE = "- If the user's task can be completed with the RunAdbShell action, prefer using the RunAdbShell action.";
14
+ const buildActionStepNotes = (actionList)=>[
15
+ '### Action Guidelines',
16
+ '',
17
+ ...actionList.includes('RunAdbShell') ? [
18
+ RUN_ADB_SHELL_ACTION_GUIDANCE
19
+ ] : [],
20
+ '- For touch continuous controls that set a value along a track, such as a slider, prefer Swipe from the current handle or filled position to the requested track endpoint instead of tapping the endpoint.',
21
+ '- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters.',
22
+ '- For insert/prepend/append edits, use CursorMove when the caret must be adjusted precisely, then use Input with mode "typeOnly" for inserted characters and KeyboardPress for newlines or deletion. If the caret lands in the wrong position, recover with CursorMove, KeyboardPress, or undo and retry cursor placement; do not switch to replace as a fallback for cursor placement failures.'
23
+ ].join('\n');
14
24
  const findDefaultValue = (field)=>{
15
25
  let current = field;
16
26
  const visited = new Set();
@@ -96,6 +106,7 @@ async function systemPromptToTaskPlanning({ actionSpace, locatePromptSpec, inclu
96
106
  if (includeLocateInPlanning && !locatePromptSpec) throw new Error(planningModelFamilyRequiredForLocateMessage());
97
107
  const actionDescriptionList = actionSpace.map((action)=>descriptionForAction(action, locateParamSchemaDescription(includeLocateInPlanning ? locatePromptSpec : void 0), includeLocateInPlanning, locatePromptSpec));
98
108
  const actionList = actionDescriptionList.join('\n');
109
+ const actionStepNotes = buildActionStepNotes(actionList);
99
110
  const shouldIncludeSubGoals = includeSubGoals ?? false;
100
111
  const locateExample = (prompt, exampleValueIndex)=>locateParamExample(prompt, includeLocateInPlanning ? locatePromptSpec : void 0, locatePromptSpec?.exampleValues[exampleValueIndex] ?? locatePromptSpec?.exampleValues[0]);
101
112
  const locateExample1 = locateExample('Add to cart button for Sauce Labs Backpack', 1);
@@ -212,6 +223,10 @@ The user's instruction defines the EXACT scope of what you must accomplish. You
212
223
  - "type 'hello' in the search box" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when 'hello' is typed. Do NOT press Enter or trigger search.
213
224
  - "select the first item" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when selected. Do NOT proceed to checkout.
214
225
 
226
+ **Change completion:**
227
+ - If the requested outcome is a durable change, such as create, edit, update, delete, save, send, submit, apply, or publish, do not stop at an unsaved draft, open editor, temporary input, transient selection, or staged value. Continue through the app/page's normal completion control such as Save, Done, Confirm, OK, Submit, Apply, Send, or Publish before completing, so the result remains after leaving the screen.
228
+ - If the user only asks for an intermediate UI state, such as typing text, selecting an option, filling fields, or opening a screen without saving/submitting/applying, stop once that exact state is reached.
229
+
215
230
  **Special case - Scrollable option lists and dropdowns:**
216
231
  - When choosing an item from a scrollable select, dropdown, listbox, menu, or similar option list, first open the control if it is closed. Once the list is open, interact with the list itself, not the page.
217
232
  - If the target option is visible in the open list, Tap that exact option immediately.
@@ -267,7 +282,7 @@ ONLY if the task is not complete: Think what the next action is according to the
267
282
  - Give just the next ONE action you should do (if any)
268
283
  - If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the "error" field to the error message.
269
284
 
270
- ${ACTION_STEP_NOTES}
285
+ ${actionStepNotes}
271
286
 
272
287
  ${includeLocateInPlanning ? `${locateGroundingRules()}
273
288
 
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import { findAllMidsceneLocatorField } from '@/common';\nimport type { DeviceAction } from '@/types';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { planningModelFamilyRequiredForLocateMessage } from '../errors';\nimport type { LocateResultPromptSpec } from '../shared/model-locate-result';\nimport { locateGroundingRules } from './locate-grounding-rules';\nimport { locateParamExample } from './locate-param-example';\n\nconst locateParamSchemaDescription = (promptSpec?: LocateResultPromptSpec) => {\n if (promptSpec) {\n return `{${promptSpec.resultKey}: ${promptSpec.resultValueSchema}, prompt: string } // ${promptSpec.resultValueDescription}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nconst OBSERVE_STEP_NOTES = [\n '### Observation Guidelines',\n '',\n '- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.',\n '- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.',\n].join('\\n');\n\nconst MEMORY_STEP_NOTES = [\n 'Use `<memory>` to record clear, task-relevant information from the current screenshot that may be needed in later steps. The current screenshot will not be available later, so memory should preserve enough detail for future reasoning, verification, or action.',\n '',\n '- Record information completely and exactly as shown. Do not summarize, translate, normalize, or merge values that may matter later.',\n '- When recording an item, include the item itself, its exact task-relevant details, and the visible cue or UI context that identifies where it came from when relevant.',\n '- Keep similar or repeated items as separate memory entries unless their task-relevant details are confirmed to be the same.',\n '- After navigation, scrolling, editing, deletion, saving, or other screen changes, treat remembered positions, order, indexes, and UI bindings as references only. Re-check the current screen before acting on them.',\n '',\n 'Examples:',\n '- If you need to find an item and later assert its details, record the item name and the exact details needed for the assertion, such as status, price, date, owner, description, or other visible fields.',\n '- If you need to compare multiple similar results, record each candidate separately with its exact distinguishing details and visible context.',\n '- If you need to copy information from one place to another, record the exact source value and the target field or UI cue it should be mapped to.',\n].join('\\n');\n\nconst ACTION_STEP_NOTES = [\n '### Action Guidelines',\n '',\n '- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters, and use Input with mode \"typeOnly\" when typing new characters for such edits.',\n].join('\\n');\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\n/**\n * Inject model locate results into locate fields of a sample object.\n * Walks the sample and for any locate field (identified by paramSchema),\n * adds a fake locate result when includeLocateInPlanning is true.\n */\nconst injectLocateResultIntoSample = (\n sample: Record<string, any>,\n locateFields: string[],\n promptSpec: LocateResultPromptSpec,\n): Record<string, any> => {\n const resultKey = promptSpec.resultKey;\n const sampleResults = promptSpec.exampleValues;\n const result = { ...sample };\n let sampleResultIndex = 0;\n for (const field of locateFields) {\n if (\n result[field] &&\n typeof result[field] === 'object' &&\n result[field].prompt\n ) {\n result[field] = {\n ...result[field],\n [resultKey]: sampleResults[sampleResultIndex % sampleResults.length],\n };\n sampleResultIndex++;\n }\n }\n return result;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locateParamTypeDescription: string,\n includeLocateInPlanning = false,\n locatePromptSpec?: LocateResultPromptSpec,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locateParamTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n // Render sample if provided, using the same XML tag format as the real output\n if (action.sample && typeof action.sample === 'object') {\n const locateFields = findAllMidsceneLocatorField(action.paramSchema);\n const sampleWithLocateResult =\n includeLocateInPlanning && locatePromptSpec\n ? injectLocateResultIntoSample(\n action.sample,\n locateFields,\n locatePromptSpec,\n )\n : action.sample;\n const sampleStr = `- sample:\\n${tab}${tab}<action-type>${action.name}</action-type>\\n${tab}${tab}<action-param-json>\\n${tab}${tab}${JSON.stringify(sampleWithLocateResult, null, 2).replace(/\\n/g, `\\n${tab}${tab}`)}\\n${tab}${tab}</action-param-json>`;\n fields.push(sampleStr);\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n locatePromptSpec,\n includeLocateInPlanning,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n locatePromptSpec?: LocateResultPromptSpec;\n includeLocateInPlanning: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n if (includeLocateInPlanning && !locatePromptSpec) {\n throw new Error(planningModelFamilyRequiredForLocateMessage());\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n locateParamSchemaDescription(\n includeLocateInPlanning ? locatePromptSpec : undefined,\n ),\n includeLocateInPlanning,\n locatePromptSpec,\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n const locateExample = (prompt: string, exampleValueIndex: number) =>\n locateParamExample(\n prompt,\n includeLocateInPlanning ? locatePromptSpec : undefined,\n locatePromptSpec?.exampleValues[exampleValueIndex] ??\n locatePromptSpec?.exampleValues[0],\n );\n const locateExample1 = locateExample(\n 'Add to cart button for Sauce Labs Backpack',\n 1,\n );\n const locateNameField = locateExample(\n 'Name input field in the registration form',\n 2,\n );\n const locateEmailField = locateExample(\n 'Email input field in the registration form',\n 3,\n );\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: \"fill out the form\" means only fill fields, do NOT submit; \"click the button\" means only click, do NOT wait for page load or verify results; \"type 'hello'\" means only type, do NOT press Enter.`;\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`\n : `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`;\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n // When includeSubGoals=false, memory step is skipped\n const memoryStepNumber = 2; // Only used when shouldIncludeSubGoals is true\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n${shouldIncludeSubGoals ? `\\n${OBSERVE_STEP_NOTES}\\n` : ''}\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalTags}\n${\n shouldIncludeSubGoals\n ? `\n## Step ${memoryStepNumber}: Memory Data from Current Screenshot (related tags: <memory>)\n\n${MEMORY_STEP_NOTES}\n\nDon't use this tag if no information needs to be preserved.\n`\n : ''\n}\n## Step ${checkGoalStepNumber}: ${shouldIncludeSubGoals ? 'Check if Goal is Accomplished' : 'Check if the Instruction is Fulfilled'} (related tags: <complete>)\n\n${shouldIncludeSubGoals ? 'Based on the current screenshot and the status of all sub-goals, determine' : 'Determine'} if the entire task is completed.\n\n### CRITICAL: The User's Instruction is the Supreme Authority\n\nThe user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.\n\n**Explicit instructions vs. High-level goals:**\n- If the user gives you **explicit operation steps** (e.g., \"click X\", \"type Y\", \"fill out the form\"), treat them as exact commands. Execute ONLY those steps, nothing more.\n- If the user gives you a **high-level goal** (e.g., \"log in to the system\", \"complete the purchase\"), you may determine the necessary steps to achieve it.\n\n**What \"${shouldIncludeSubGoals ? 'goal accomplished' : 'instruction fulfilled'}\" means:**\n- The ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} when you have done EXACTLY what the user asked - no extra steps, no assumptions.\n- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.\n\n**Examples - Explicit instructions (execute exactly, no extra steps):**\n- \"fill out the form\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when all fields are filled. Do NOT submit the form.\n- \"click the login button\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} once clicked. Do NOT wait for page load or verify login success.\n- \"type 'hello' in the search box\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when 'hello' is typed. Do NOT press Enter or trigger search.\n- \"select the first item\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when selected. Do NOT proceed to checkout.\n\n**Special case - Scrollable option lists and dropdowns:**\n- When choosing an item from a scrollable select, dropdown, listbox, menu, or similar option list, first open the control if it is closed. Once the list is open, interact with the list itself, not the page.\n- If the target option is visible in the open list, Tap that exact option immediately.\n- If the list is open but the target option is not visible, try to find it by scrolling the open list/dropdown before giving up or interacting with other elements.\n- For an open dropdown/list, prefer small incremental Scroll actions with an explicit distance (typically 50-120 pixels) and a locate target describing the open list/dropdown. Do NOT omit distance while searching within a list, because the default scroll distance can skip over relevant options and cause oscillation.\n- While searching within an open dropdown/list, use short scrolls so intermediate options are not skipped.\n- After selecting the target, if the trigger text or result text shows the requested option, treat the current selection step as fulfilled and continue evaluating the remaining user instruction.\n\n**Special case - Text hidden by a narrow input field:**\n- CRITICAL PRIORITY OVERRIDE - Input verification after an input action:\n- This rule overrides the general requirement to verify the exact target text from the screenshot.\n- If the previous step already executed an input action, and the current input field is not empty, you MUST directly treat that input as successful.\n- In this situation, do NOT use the visible text in the screenshot to decide that the input is incomplete, incorrect, missing characters, out of order, or needs correction.\n- Do NOT perform ClearInput, another Input, KeyboardPress, or any other action intended to \"fix the input content\" just because the visible text looks different from the target text, has abnormal character order, missing characters, abnormal spacing, suspicious visual recognition results, or appears to have selected/highlighted text.\n- The general rule \"do EXACTLY what the user asked\" still applies to the intended input value you execute, but it MUST NOT be enforced by re-validating the visible text in the screenshot after the input action.\n- Differences in visible text must be assumed to be caused by clipping, horizontal scrolling, narrow input fields, text selection, caret position, or visual recognition errors rather than input failure.\n- Retry input only when the input field is clearly still empty, or when the page shows an explicit error message.\n\n**Special case - Assertion instructions:**\n- If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark ${shouldIncludeSubGoals ? 'the goal' : 'it'} as failed (success=\"false\").\n- If the page is still loading (e.g., you see a loading spinner, skeleton screen, or progress bar), do NOT assert yet. Wait for the page to finish loading before evaluating the assertion.\n\n### Completion Criteria for Process-required Instructions\n\nIf the user's instruction includes explicit operation steps, ordering requirements, or action requirements, it is a process-required instruction.\n\nFor process-required instructions, do NOT treat the task as complete only because the current screenshot already shows the final expected state. Do NOT infer that earlier steps were executed from the final UI state.\n\nYou may output <complete success=\"true\"> only when the current execution history, previous logs, or the screenshot after the most recent action proves that every explicit step required by the user has been completed, and the final check condition is also satisfied.\n\nIf any explicit step lacks completion evidence in the current execution history, continue with the next missing step instead of outputting <complete>, even if the current screenshot appears to satisfy the final condition.\n${\n !shouldIncludeSubGoals\n ? `\n**Page navigation restriction:**\n- Unless the user's instruction explicitly asks you to click a link, jump to another page, or navigate to a URL, you MUST complete the task on the current page only.\n- Do NOT navigate away from the current page on your own initiative (e.g., do not click links that lead to other pages, do not use browser back/forward, do not open new URLs).\n- If the task cannot be accomplished on the current page and the user has not instructed you to navigate, report it as a failure (success=\"false\") instead of attempting to navigate to other pages.\n`\n : ''\n}\n### Output Rules\n\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n- Use the <complete success=\"true|false\">message</complete> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. ${shouldIncludeSubGoals ? 'It means whether the expected goal is accomplished based on what you observe in the current screenshot and the current execution history. ' : ''}No matter what errors occurred during execution, set success=\"true\" only when the current execution history shows that all steps required by the user have been completed and the final state satisfies the requirement. If the user asks for explicit operation steps or an ordered workflow, do not treat those steps as completed only because the current screenshot already shows the final expected state. If the ${shouldIncludeSubGoals ? 'expected goal is not accomplished and cannot be accomplished' : 'instruction is not fulfilled and cannot be fulfilled'}, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n- If you output <complete>, do NOT output <action-type> or <action-param-json>. The task ends here.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n${ACTION_STEP_NOTES}\n\n${\n includeLocateInPlanning\n ? `${locateGroundingRules()}\n\n`\n : ''\n}### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete' is NOT a valid action-type.\n- Parameter names are strict. Use EXACTLY the field names listed for the selected action. Do NOT invent alias fields. If an action has a \"sample\" in its description, follow that structure.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include (REQUIRED):**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}${\n shouldIncludeSubGoals\n ? `\n<!-- Step ${memoryStepNumber}: Memory data from current screenshot if needed -->\n<memory>...</memory>\n`\n : ''\n}\n**Then choose ONE of the following paths:**\n\n**Path A: If the ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} or failed (Step ${checkGoalStepNumber})**\n<complete success=\"true|false\">...</complete>\n\n**Path B: If the ${shouldIncludeSubGoals ? 'goal is NOT complete' : 'instruction is NOT fulfilled'} yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n${\n shouldIncludeSubGoals\n ? `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I'll break this down into sub-goals and start with the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Fill in the Name field with 'John'</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Fill in the Email field with 'john@example.com'</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Return the filled email address</sub-goal>\n</update-plan-content>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. Current sub-goal is running, will be completed after input.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n- Typing 'John' into the Name field\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Sub-goal 1 is complete. Next, I need to fill the Email field for sub-goal 2.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n<memory>Name field has been filled with 'John'</memory>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, sub-goal 2 will be complete and my task will be done.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Goal accomplished)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n- Typing email address into the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. Sub-goal 2 is complete. The user asked me to return the filled email address, so I need to include 'john@example.com' in my response. All sub-goals are now finished.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"2\" status=\"finished\" />\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>\n<complete success=\"true\">john@example.com</complete>\n`\n : `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I should start by clicking on the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. After this, I'll need to fill the Email field as well.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Next, I need to fill the Email field with 'john@example.com'. I'll click on the Email field first.</thought>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, both required fields will be filled and my task will be complete.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Instruction fulfilled)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. The user asked me to return the filled email address, so I should include 'john@example.com' in my response. The instruction has been fulfilled.</thought>\n<complete success=\"true\">john@example.com</complete>\n`\n}`;\n}\n"],"names":["locateParamSchemaDescription","promptSpec","OBSERVE_STEP_NOTES","MEMORY_STEP_NOTES","ACTION_STEP_NOTES","findDefaultValue","field","current","visited","Set","currentWithDef","injectLocateResultIntoSample","sample","locateFields","resultKey","sampleResults","result","sampleResultIndex","descriptionForAction","action","locateParamTypeDescription","includeLocateInPlanning","locatePromptSpec","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","findAllMidsceneLocatorField","sampleWithLocateResult","sampleStr","systemPromptToTaskPlanning","actionSpace","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","planningModelFamilyRequiredForLocateMessage","actionDescriptionList","actionList","shouldIncludeSubGoals","locateExample","prompt","exampleValueIndex","locateParamExample","locateExample1","locateNameField","locateEmailField","step1Title","step1Description","explicitInstructionRule","thoughtTagDescription","subGoalTags","memoryStepNumber","checkGoalStepNumber","actionStepNumber","locateGroundingRules"],"mappings":";;;;;;AAaA,MAAMA,+BAA+B,CAACC;IACpC,IAAIA,YACF,OAAO,CAAC,CAAC,EAAEA,WAAW,SAAS,CAAC,EAAE,EAAEA,WAAW,iBAAiB,CAAC,sBAAsB,EAAEA,WAAW,sBAAsB,EAAE;IAE9H,OAAO;AACT;AAEA,MAAMC,qBAAqB;AAO3B,MAAMC,oBAAoB;AAc1B,MAAMC,oBAAoB;AAS1B,MAAMC,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAOA,MAAMC,+BAA+B,CACnCC,QACAC,cACAZ;IAEA,MAAMa,YAAYb,WAAW,SAAS;IACtC,MAAMc,gBAAgBd,WAAW,aAAa;IAC9C,MAAMe,SAAS;QAAE,GAAGJ,MAAM;IAAC;IAC3B,IAAIK,oBAAoB;IACxB,KAAK,MAAMX,SAASO,aAClB,IACEG,MAAM,CAACV,MAAM,IACb,AAAyB,YAAzB,OAAOU,MAAM,CAACV,MAAM,IACpBU,MAAM,CAACV,MAAM,CAAC,MAAM,EACpB;QACAU,MAAM,CAACV,MAAM,GAAG;YACd,GAAGU,MAAM,CAACV,MAAM;YAChB,CAACQ,UAAU,EAAEC,aAAa,CAACE,oBAAoBF,cAAc,MAAM,CAAC;QACtE;QACAE;IACF;IAEF,OAAOD;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC,4BACAC,0BAA0B,KAAK,EAC/BC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEL,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,SAASP,OAAO,WAAW;QAIjC,MAAMQ,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKvB,MAAM,IAAIwB,OAAO,OAAO,CAACF,OACxC,IAAItB,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMyB,aACJ,AACE,cADF,OAAQzB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAM0B,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAe5B,OAAOc;gBAGvC,MAAMe,cAAcC,kBAAkB9B;gBAGtC,MAAM+B,eAAehC,iBAAiBC;gBACtC,MAAMgC,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAGA,IAAI1B,OAAO,MAAM,IAAI,AAAyB,YAAzB,OAAOA,OAAO,MAAM,EAAe;QACtD,MAAMN,eAAeiC,4BAA4B3B,OAAO,WAAW;QACnE,MAAM4B,yBACJ1B,2BAA2BC,mBACvBX,6BACEQ,OAAO,MAAM,EACbN,cACAS,oBAEFH,OAAO,MAAM;QACnB,MAAM6B,YAAY,CAAC,WAAW,EAAEzB,MAAMA,IAAI,aAAa,EAAEJ,OAAO,IAAI,CAAC,gBAAgB,EAAEI,MAAMA,IAAI,qBAAqB,EAAEA,MAAMA,MAAMoB,KAAK,SAAS,CAACI,wBAAwB,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAExB,MAAMA,KAAK,EAAE,EAAE,EAAEA,MAAMA,IAAI,oBAAoB,CAAC;QACxPC,OAAO,IAAI,CAACwB;IACd;IAEA,OAAO,CAAC,EAAE,EAAE7B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEI,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe0B,2BAA2B,EAC/CC,WAAW,EACX5B,gBAAgB,EAChBD,uBAAuB,EACvB8B,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC;IAE1B,IAAIjC,2BAA2B,CAACC,kBAC9B,MAAM,IAAIiC,MAAMC;IAGlB,MAAMC,wBAAwBP,YAAY,GAAG,CAAC,CAAC/B,SACtCD,qBACLC,QACAnB,6BACEqB,0BAA0BC,mBAAmBiB,SAE/ClB,yBACAC;IAGJ,MAAMoC,aAAaD,sBAAsB,IAAI,CAAC;IAG9C,MAAME,wBAAwBP,mBAAmB;IAEjD,MAAMQ,gBAAgB,CAACC,QAAgBC,oBACrCC,mBACEF,QACAxC,0BAA0BC,mBAAmBiB,QAC7CjB,kBAAkB,aAAa,CAACwC,kBAAkB,IAChDxC,kBAAkB,aAAa,CAAC,EAAE;IAExC,MAAM0C,iBAAiBJ,cACrB,8CACA;IAEF,MAAMK,kBAAkBL,cACtB,6CACA;IAEF,MAAMM,mBAAmBN,cACvB,8CACA;IAOF,MAAMO,aAAaR,wBACf,uGACA;IAEJ,MAAMS,mBAAmBT,wBACrB,wNACA;IAEJ,MAAMU,0BAA0B;IAEhC,MAAMC,wBAAwBX,wBAC1B,CAAC;;;;AAIP,EAAEU,yBAAyB,GACrB,CAAC;;;;AAIP,EAAEA,yBAAyB;IAEzB,MAAME,cAAcZ,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAIJ,MAAMa,mBAAmB;IACzB,MAAMC,sBAAsBd,wBAAwB,IAAI;IACxD,MAAMe,mBAAmBf,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEQ,WAAW;;AAEb,EAAEC,iBAAiB;AACnB,EAAET,wBAAwB,CAAC,EAAE,EAAEzD,mBAAmB,EAAE,CAAC,GAAG,GAAG;;;AAG3D,EAAEoE,sBAAsB;AACxB,EAAEC,YAAY;AACd,EACEZ,wBACI,CAAC;QACC,EAAEa,iBAAiB;;AAE3B,EAAErE,kBAAkB;;;AAGpB,CAAC,GACK,GACL;QACO,EAAEsE,oBAAoB,EAAE,EAAEd,wBAAwB,kCAAkC,wCAAwC;;AAEpI,EAAEA,wBAAwB,+EAA+E,YAAY;;;;;;;;;;QAU7G,EAAEA,wBAAwB,sBAAsB,wBAAwB;MAC1E,EAAEA,wBAAwB,yBAAyB,2BAA2B;;;;wBAI5D,EAAEA,wBAAwB,sBAAsB,wBAAwB;6BACnE,EAAEA,wBAAwB,sBAAsB,wBAAwB;qCAChE,EAAEA,wBAAwB,sBAAsB,wBAAwB;4BACjF,EAAEA,wBAAwB,sBAAsB,wBAAwB;;;;;;;;;;;;;;;;;;;;;0NAqBsH,EAAEA,wBAAwB,aAAa,KAAK;;;;;;;;;;;;AAYtQ,EACE,CAACA,wBACG,CAAC;;;;;AAKP,CAAC,GACK,GACL;;;sEAGqE,EAAEe,iBAAiB;;yCAEhD,EAAEf,wBAAwB,+IAA+I,GAAG,wZAAwZ,EAAEA,wBAAwB,iEAAiE,uDAAuD;;;;QAIvvB,EAAEe,iBAAiB;;mGAEwE,EAAEf,wBAAwB,kBAAkB,GAAG;;;;;;;;AAQlJ,EAAEvD,kBAAkB;;AAEpB,EACEiB,0BACI,GAAGsD,uBAAuB;;AAEhC,CAAC,GACK,GACL;;AAED,EAAEjB,WAAW;;;;;;QAML,EAAEL,kBAAkB;;;;;;;;;;;;;;;;;;;;YAoBhB,EAAEW,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAEL,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,KAEJA,wBACI,CAAC;UACG,EAAEa,iBAAiB;;AAE7B,CAAC,GACK,GACL;;;iBAGgB,EAAEb,wBAAwB,yBAAyB,2BAA2B,iBAAiB,EAAEc,oBAAoB;;;iBAGrH,EAAEd,wBAAwB,yBAAyB,+BAA+B,WAAW,EAAEe,iBAAiB;;;;;;;;AAQjI,EACEf,wBACI,CAAC;;;;;;;;;;;;;;;;;;;;;;;YAuBK,EAAEM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAuDlB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoD/B,CAAC,GACK,CAAC;;;;;;;;;;;;;;;;;;YAkBK,EAAED,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAkClB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+B/B,CAAC,EACC;AACF"}
1
+ {"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import { findAllMidsceneLocatorField } from '@/common';\nimport type { DeviceAction } from '@/types';\nimport { getPreferredLanguage } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { z } from 'zod';\nimport { planningModelFamilyRequiredForLocateMessage } from '../errors';\nimport type { LocateResultPromptSpec } from '../shared/model-locate-result';\nimport { locateGroundingRules } from './locate-grounding-rules';\nimport { locateParamExample } from './locate-param-example';\n\nconst locateParamSchemaDescription = (promptSpec?: LocateResultPromptSpec) => {\n if (promptSpec) {\n return `{${promptSpec.resultKey}: ${promptSpec.resultValueSchema}, prompt: string } // ${promptSpec.resultValueDescription}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nconst OBSERVE_STEP_NOTES = [\n '### Observation Guidelines',\n '',\n '- Treat visible summaries, thumbnails, cropped content, and partially visible lists as potentially incomplete when the task depends on precise details.',\n '- If the current view does not provide enough information to decide safely, use available UI affordances such as opening details, expanding content, previewing, enlarging, zooming, or scrolling before acting.',\n].join('\\n');\n\nconst MEMORY_STEP_NOTES = [\n 'Use `<memory>` to record clear, task-relevant information from the current screenshot that may be needed in later steps. The current screenshot will not be available later, so memory should preserve enough detail for future reasoning, verification, or action.',\n '',\n '- Record information completely and exactly as shown. Do not summarize, translate, normalize, or merge values that may matter later.',\n '- When recording an item, include the item itself, its exact task-relevant details, and the visible cue or UI context that identifies where it came from when relevant.',\n '- Keep similar or repeated items as separate memory entries unless their task-relevant details are confirmed to be the same.',\n '- After navigation, scrolling, editing, deletion, saving, or other screen changes, treat remembered positions, order, indexes, and UI bindings as references only. Re-check the current screen before acting on them.',\n '',\n 'Examples:',\n '- If you need to find an item and later assert its details, record the item name and the exact details needed for the assertion, such as status, price, date, owner, description, or other visible fields.',\n '- If you need to compare multiple similar results, record each candidate separately with its exact distinguishing details and visible context.',\n '- If you need to copy information from one place to another, record the exact source value and the target field or UI cue it should be mapped to.',\n].join('\\n');\n\nconst RUN_ADB_SHELL_ACTION_GUIDANCE =\n \"- If the user's task can be completed with the RunAdbShell action, prefer using the RunAdbShell action.\";\n\nconst buildActionStepNotes = (actionList: string) =>\n [\n '### Action Guidelines',\n '',\n ...(actionList.includes('RunAdbShell')\n ? [RUN_ADB_SHELL_ACTION_GUIDANCE]\n : []),\n '- For touch continuous controls that set a value along a track, such as a slider, prefer Swipe from the current handle or filled position to the requested track endpoint instead of tapping the endpoint.',\n '- When editing existing text in a UI field, preserve all existing text by moving the cursor and typing/deleting the minimal necessary characters.',\n '- For insert/prepend/append edits, use CursorMove when the caret must be adjusted precisely, then use Input with mode \"typeOnly\" for inserted characters and KeyboardPress for newlines or deletion. If the caret lands in the wrong position, recover with CursorMove, KeyboardPress, or undo and retry cursor placement; do not switch to replace as a fallback for cursor placement failures.',\n ].join('\\n');\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\n/**\n * Inject model locate results into locate fields of a sample object.\n * Walks the sample and for any locate field (identified by paramSchema),\n * adds a fake locate result when includeLocateInPlanning is true.\n */\nconst injectLocateResultIntoSample = (\n sample: Record<string, any>,\n locateFields: string[],\n promptSpec: LocateResultPromptSpec,\n): Record<string, any> => {\n const resultKey = promptSpec.resultKey;\n const sampleResults = promptSpec.exampleValues;\n const result = { ...sample };\n let sampleResultIndex = 0;\n for (const field of locateFields) {\n if (\n result[field] &&\n typeof result[field] === 'object' &&\n result[field].prompt\n ) {\n result[field] = {\n ...result[field],\n [resultKey]: sampleResults[sampleResultIndex % sampleResults.length],\n };\n sampleResultIndex++;\n }\n }\n return result;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locateParamTypeDescription: string,\n includeLocateInPlanning = false,\n locatePromptSpec?: LocateResultPromptSpec,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locateParamTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n // Render sample if provided, using the same XML tag format as the real output\n if (action.sample && typeof action.sample === 'object') {\n const locateFields = findAllMidsceneLocatorField(action.paramSchema);\n const sampleWithLocateResult =\n includeLocateInPlanning && locatePromptSpec\n ? injectLocateResultIntoSample(\n action.sample,\n locateFields,\n locatePromptSpec,\n )\n : action.sample;\n const sampleStr = `- sample:\\n${tab}${tab}<action-type>${action.name}</action-type>\\n${tab}${tab}<action-param-json>\\n${tab}${tab}${JSON.stringify(sampleWithLocateResult, null, 2).replace(/\\n/g, `\\n${tab}${tab}`)}\\n${tab}${tab}</action-param-json>`;\n fields.push(sampleStr);\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n locatePromptSpec,\n includeLocateInPlanning,\n includeThought,\n includeSubGoals,\n}: {\n actionSpace: DeviceAction<any>[];\n locatePromptSpec?: LocateResultPromptSpec;\n includeLocateInPlanning: boolean;\n includeThought?: boolean;\n includeSubGoals?: boolean;\n}) {\n const preferredLanguage = getPreferredLanguage();\n\n if (includeLocateInPlanning && !locatePromptSpec) {\n throw new Error(planningModelFamilyRequiredForLocateMessage());\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n locateParamSchemaDescription(\n includeLocateInPlanning ? locatePromptSpec : undefined,\n ),\n includeLocateInPlanning,\n locatePromptSpec,\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n const actionStepNotes = buildActionStepNotes(actionList);\n\n const shouldIncludeThought = includeThought ?? true;\n const shouldIncludeSubGoals = includeSubGoals ?? false;\n\n const locateExample = (prompt: string, exampleValueIndex: number) =>\n locateParamExample(\n prompt,\n includeLocateInPlanning ? locatePromptSpec : undefined,\n locatePromptSpec?.exampleValues[exampleValueIndex] ??\n locatePromptSpec?.exampleValues[0],\n );\n const locateExample1 = locateExample(\n 'Add to cart button for Sauce Labs Backpack',\n 1,\n );\n const locateNameField = locateExample(\n 'Name input field in the registration form',\n 2,\n );\n const locateEmailField = locateExample(\n 'Email input field in the registration form',\n 3,\n );\n\n const thoughtTag = (content: string) =>\n shouldIncludeThought ? `<thought>${content}</thought>\\n` : '';\n\n // Sub-goals related content - only included when shouldIncludeSubGoals is true\n const step1Title = shouldIncludeSubGoals\n ? '## Step 1: Observe and Plan (related tags: <thought>, <update-plan-content>, <mark-sub-goal-done>)'\n : '## Step 1: Observe (related tags: <thought>)';\n\n const step1Description = shouldIncludeSubGoals\n ? \"First, observe the current screenshot and previous logs, then break down the user's instruction into multiple high-level sub-goals. Update the status of sub-goals based on what you see in the current screenshot.\"\n : 'First, observe the current screenshot and previous logs to understand the current state.';\n\n const explicitInstructionRule = `CRITICAL - Following Explicit Instructions: When the user gives you specific operation steps (not high-level goals), you MUST execute ONLY those exact steps - nothing more, nothing less. Do NOT add extra actions even if they seem logical. For example: \"fill out the form\" means only fill fields, do NOT submit; \"click the button\" means only click, do NOT wait for page load or verify results; \"type 'hello'\" means only type, do NOT press Enter.`;\n\n const thoughtTagDescription = shouldIncludeSubGoals\n ? `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the user's requirement? What is the current state based on the screenshot? Are all sub-goals completed? If not, what should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`\n : `REQUIRED: You MUST always output the <thought> tag. Never skip it.\n\nInclude your thought process in the <thought> tag. It should answer: What is the current state based on the screenshot? What should be the next action? Write your thoughts naturally without numbering or section headers.\n\n${explicitInstructionRule}`;\n\n const subGoalTags = shouldIncludeSubGoals\n ? `\n\n* <update-plan-content> tag\n\nUse this structure to give or update your plan:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished|pending\">sub goal description</sub-goal>\n <sub-goal index=\"2\" status=\"finished|pending\">sub goal description</sub-goal>\n ...\n</update-plan-content>\n\n* <mark-sub-goal-done> tag\n\nUse this structure to mark a sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nIMPORTANT: You MUST only mark a sub-goal as \"finished\" AFTER you have confirmed the task is actually completed by observing the result in the screenshot. Do NOT mark a sub-goal as done just because you expect the next action will complete it. Wait until you see visual confirmation in the screenshot that the sub-goal has been achieved.\n\n* Note\n\nDuring execution, you can call <update-plan-content> at any time to update the plan based on the latest screenshot and completed sub-goals.\n\n### Example\n\nIf the user wants to \"log in to a system using username and password, complete all to-do items, and submit a registration form\", you can break it down into the following sub-goals:\n\n<thought>...</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Log in to the system</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Complete all to-do items</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Submit the registration form</sub-goal>\n</update-plan-content>\n\nAfter logging in and seeing the to-do items, you can mark the sub-goal as done:\n\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n\nAt this point, the status of all sub-goals is:\n\n<update-plan-content>\n <sub-goal index=\"1\" status=\"finished\" />\n <sub-goal index=\"2\" status=\"pending\" />\n <sub-goal index=\"3\" status=\"pending\" />\n</update-plan-content>\n\nAfter some time, when the last sub-goal is also completed, you can mark it as done as well:\n\n<mark-sub-goal-done>\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>`\n : '';\n\n // Step numbering adjusts based on whether sub-goals are included\n // When includeSubGoals=false, memory step is skipped\n const memoryStepNumber = 2; // Only used when shouldIncludeSubGoals is true\n const checkGoalStepNumber = shouldIncludeSubGoals ? 3 : 2;\n const actionStepNumber = shouldIncludeSubGoals ? 4 : 3;\n\n return `\nTarget: You are an expert to manipulate the UI to accomplish the user's instruction. User will give you an instruction, some screenshots, background knowledge and previous logs indicating what have been done. Your task is to accomplish the instruction by thinking through the path to complete the task and give the next action to execute.\n\n${step1Title}\n\n${step1Description}\n${shouldIncludeSubGoals ? `\\n${OBSERVE_STEP_NOTES}\\n` : ''}\n* <thought> tag (REQUIRED)\n\n${thoughtTagDescription}\n${subGoalTags}\n${\n shouldIncludeSubGoals\n ? `\n## Step ${memoryStepNumber}: Memory Data from Current Screenshot (related tags: <memory>)\n\n${MEMORY_STEP_NOTES}\n\nDon't use this tag if no information needs to be preserved.\n`\n : ''\n}\n## Step ${checkGoalStepNumber}: ${shouldIncludeSubGoals ? 'Check if Goal is Accomplished' : 'Check if the Instruction is Fulfilled'} (related tags: <complete>)\n\n${shouldIncludeSubGoals ? 'Based on the current screenshot and the status of all sub-goals, determine' : 'Determine'} if the entire task is completed.\n\n### CRITICAL: The User's Instruction is the Supreme Authority\n\nThe user's instruction defines the EXACT scope of what you must accomplish. You MUST follow it precisely - nothing more, nothing less. Violating this rule may cause severe consequences such as data loss, unintended operations, or system failures.\n\n**Explicit instructions vs. High-level goals:**\n- If the user gives you **explicit operation steps** (e.g., \"click X\", \"type Y\", \"fill out the form\"), treat them as exact commands. Execute ONLY those steps, nothing more.\n- If the user gives you a **high-level goal** (e.g., \"log in to the system\", \"complete the purchase\"), you may determine the necessary steps to achieve it.\n\n**What \"${shouldIncludeSubGoals ? 'goal accomplished' : 'instruction fulfilled'}\" means:**\n- The ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} when you have done EXACTLY what the user asked - no extra steps, no assumptions.\n- Do NOT perform any action beyond the explicit instruction, even if it seems logical or helpful.\n\n**Examples - Explicit instructions (execute exactly, no extra steps):**\n- \"fill out the form\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when all fields are filled. Do NOT submit the form.\n- \"click the login button\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} once clicked. Do NOT wait for page load or verify login success.\n- \"type 'hello' in the search box\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when 'hello' is typed. Do NOT press Enter or trigger search.\n- \"select the first item\" → ${shouldIncludeSubGoals ? 'Goal accomplished' : 'Instruction fulfilled'} when selected. Do NOT proceed to checkout.\n\n**Change completion:**\n- If the requested outcome is a durable change, such as create, edit, update, delete, save, send, submit, apply, or publish, do not stop at an unsaved draft, open editor, temporary input, transient selection, or staged value. Continue through the app/page's normal completion control such as Save, Done, Confirm, OK, Submit, Apply, Send, or Publish before completing, so the result remains after leaving the screen.\n- If the user only asks for an intermediate UI state, such as typing text, selecting an option, filling fields, or opening a screen without saving/submitting/applying, stop once that exact state is reached.\n\n**Special case - Scrollable option lists and dropdowns:**\n- When choosing an item from a scrollable select, dropdown, listbox, menu, or similar option list, first open the control if it is closed. Once the list is open, interact with the list itself, not the page.\n- If the target option is visible in the open list, Tap that exact option immediately.\n- If the list is open but the target option is not visible, try to find it by scrolling the open list/dropdown before giving up or interacting with other elements.\n- For an open dropdown/list, prefer small incremental Scroll actions with an explicit distance (typically 50-120 pixels) and a locate target describing the open list/dropdown. Do NOT omit distance while searching within a list, because the default scroll distance can skip over relevant options and cause oscillation.\n- While searching within an open dropdown/list, use short scrolls so intermediate options are not skipped.\n- After selecting the target, if the trigger text or result text shows the requested option, treat the current selection step as fulfilled and continue evaluating the remaining user instruction.\n\n**Special case - Text hidden by a narrow input field:**\n- CRITICAL PRIORITY OVERRIDE - Input verification after an input action:\n- This rule overrides the general requirement to verify the exact target text from the screenshot.\n- If the previous step already executed an input action, and the current input field is not empty, you MUST directly treat that input as successful.\n- In this situation, do NOT use the visible text in the screenshot to decide that the input is incomplete, incorrect, missing characters, out of order, or needs correction.\n- Do NOT perform ClearInput, another Input, KeyboardPress, or any other action intended to \"fix the input content\" just because the visible text looks different from the target text, has abnormal character order, missing characters, abnormal spacing, suspicious visual recognition results, or appears to have selected/highlighted text.\n- The general rule \"do EXACTLY what the user asked\" still applies to the intended input value you execute, but it MUST NOT be enforced by re-validating the visible text in the screenshot after the input action.\n- Differences in visible text must be assumed to be caused by clipping, horizontal scrolling, narrow input fields, text selection, caret position, or visual recognition errors rather than input failure.\n- Retry input only when the input field is clearly still empty, or when the page shows an explicit error message.\n\n**Special case - Assertion instructions:**\n- If the user's instruction includes an assertion (e.g., \"verify that...\", \"check that...\", \"assert...\"), and you observe from the screenshot that the assertion condition is NOT satisfied and cannot be satisfied, mark ${shouldIncludeSubGoals ? 'the goal' : 'it'} as failed (success=\"false\").\n- If the page is still loading (e.g., you see a loading spinner, skeleton screen, or progress bar), do NOT assert yet. Wait for the page to finish loading before evaluating the assertion.\n\n### Completion Criteria for Process-required Instructions\n\nIf the user's instruction includes explicit operation steps, ordering requirements, or action requirements, it is a process-required instruction.\n\nFor process-required instructions, do NOT treat the task as complete only because the current screenshot already shows the final expected state. Do NOT infer that earlier steps were executed from the final UI state.\n\nYou may output <complete success=\"true\"> only when the current execution history, previous logs, or the screenshot after the most recent action proves that every explicit step required by the user has been completed, and the final check condition is also satisfied.\n\nIf any explicit step lacks completion evidence in the current execution history, continue with the next missing step instead of outputting <complete>, even if the current screenshot appears to satisfy the final condition.\n${\n !shouldIncludeSubGoals\n ? `\n**Page navigation restriction:**\n- Unless the user's instruction explicitly asks you to click a link, jump to another page, or navigate to a URL, you MUST complete the task on the current page only.\n- Do NOT navigate away from the current page on your own initiative (e.g., do not click links that lead to other pages, do not use browser back/forward, do not open new URLs).\n- If the task cannot be accomplished on the current page and the user has not instructed you to navigate, report it as a failure (success=\"false\") instead of attempting to navigate to other pages.\n`\n : ''\n}\n### Output Rules\n\n- If the task is NOT complete, skip this section and continue to Step ${actionStepNumber}.\n- Use the <complete success=\"true|false\">message</complete> tag to output the result if the goal is accomplished or failed.\n - the 'success' attribute is required. ${shouldIncludeSubGoals ? 'It means whether the expected goal is accomplished based on what you observe in the current screenshot and the current execution history. ' : ''}No matter what errors occurred during execution, set success=\"true\" only when the current execution history shows that all steps required by the user have been completed and the final state satisfies the requirement. If the user asks for explicit operation steps or an ordered workflow, do not treat those steps as completed only because the current screenshot already shows the final expected state. If the ${shouldIncludeSubGoals ? 'expected goal is not accomplished and cannot be accomplished' : 'instruction is not fulfilled and cannot be fulfilled'}, set success=\"false\".\n - the 'message' is the information that will be provided to the user. If the user asks for a specific format, strictly follow that.\n- If you output <complete>, do NOT output <action-type> or <action-param-json>. The task ends here.\n\n## Step ${actionStepNumber}: Determine Next Action (related tags: <log>, <action-type>, <action-param-json>, <error>)\n\nONLY if the task is not complete: Think what the next action is according to the current screenshot${shouldIncludeSubGoals ? ' and the plan' : ''}.\n\n- Don't give extra actions or plans beyond the instruction or the plan. For example, don't try to submit the form if the instruction is only to fill something.\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully. Otherwise, retry or do something else to recover.\n- Give just the next ONE action you should do (if any)\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 3 times, you should think this is an error and set the \"error\" field to the error message.\n\n${actionStepNotes}\n\n${\n includeLocateInPlanning\n ? `${locateGroundingRules()}\n\n`\n : ''\n}### Supporting actions list\n\n${actionList}\n\n### Log to give user feedback (preamble message)\n\nThe <log> tag is a brief preamble message to the user explaining what you're about to do. It should follow these principles and examples:\n\n- **Use ${preferredLanguage}**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what's been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- <log>Click the login button</log>\n- <log>Scroll to find the 'Yes' button in popup</log>\n- <log>Previous actions failed to find the 'Yes' button, i will try again</log>\n- <log>Go back to find the login button</log>\n\n### If there is some action to do ...\n\n- Use the <action-type> and <action-param-json> tags to output the action to be executed.\n- The <action-type> MUST be one of the supporting actions. 'complete' is NOT a valid action-type.\n- Parameter names are strict. Use EXACTLY the field names listed for the selected action. Do NOT invent alias fields. If an action has a \"sample\" in its description, follow that structure.\nFor example:\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateExample1}\n}\n</action-param-json>\n\n### If you think there is an error ...\n\n- Use the <error> tag to output the error message.\n\nFor example:\n<error>Unable to find the required element on the page</error>\n\n### If there is no action to do ...\n\n- Don't output <action-type> or <action-param-json> if there is no action to do.\n\n## Return Format\n\nReturn in XML format following this decision flow:\n\n**Always include (REQUIRED):**\n<!-- Step 1: Observe${shouldIncludeSubGoals ? ' and Plan' : ''} -->\n<thought>Your thought process here. NEVER skip this tag.</thought>\n${\n shouldIncludeSubGoals\n ? `\n<!-- required when no update-plan-content is provided in the previous response -->\n<update-plan-content>...</update-plan-content>\n\n<!-- required when any sub-goal is completed -->\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n`\n : ''\n}${\n shouldIncludeSubGoals\n ? `\n<!-- Step ${memoryStepNumber}: Memory data from current screenshot if needed -->\n<memory>...</memory>\n`\n : ''\n}\n**Then choose ONE of the following paths:**\n\n**Path A: If the ${shouldIncludeSubGoals ? 'goal is accomplished' : 'instruction is fulfilled'} or failed (Step ${checkGoalStepNumber})**\n<complete success=\"true|false\">...</complete>\n\n**Path B: If the ${shouldIncludeSubGoals ? 'goal is NOT complete' : 'instruction is NOT fulfilled'} yet (Step ${actionStepNumber})**\n<!-- Determine next action -->\n<log>...</log>\n<action-type>...</action-type>\n<action-param-json>...</action-param-json>\n\n<!-- OR if there's an error -->\n<error>...</error>\n${\n shouldIncludeSubGoals\n ? `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I'll break this down into sub-goals and start with the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<update-plan-content>\n <sub-goal index=\"1\" status=\"pending\">Fill in the Name field with 'John'</sub-goal>\n <sub-goal index=\"2\" status=\"pending\">Fill in the Email field with 'john@example.com'</sub-goal>\n <sub-goal index=\"3\" status=\"pending\">Return the filled email address</sub-goal>\n</update-plan-content>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. Current sub-goal is running, will be completed after input.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (running)\n2. Fill in the Email field with 'john@example.com' (pending)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Name field with 'John'\nActions performed for current sub-goal:\n- Click on the Name field to start filling the form\n- Typing 'John' into the Name field\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Sub-goal 1 is complete. Next, I need to fill the Email field for sub-goal 2.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"1\" status=\"finished\" />\n</mark-sub-goal-done>\n<memory>Name field has been filled with 'John'</memory>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, sub-goal 2 will be complete and my task will be done.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Goal accomplished)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\nSub-goals:\n1. Fill in the Name field with 'John' (finished)\n2. Fill in the Email field with 'john@example.com' (running)\n3. Return the filled email address (pending)\nCurrent sub-goal is: Fill in the Email field with 'john@example.com'\nActions performed for current sub-goal:\n- Moving to the Email field\n- Typing email address into the Email field\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. Sub-goal 2 is complete. The user asked me to return the filled email address, so I need to include 'john@example.com' in my response. All sub-goals are now finished.</thought>\n<mark-sub-goal-done>\n <sub-goal index=\"2\" status=\"finished\" />\n <sub-goal index=\"3\" status=\"finished\" />\n</mark-sub-goal-done>\n<complete success=\"true\">john@example.com</complete>\n`\n : `\n## Multi-turn Conversation Example\n\nBelow is an example of a multi-turn conversation for \"fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address\":\n\n### Turn 1 - Initial instruction\n\n**User message:**\n<user_instruction>fill out the registration form with name 'John' and email 'john@example.com', then return the filled email address</user_instruction>\n\n**Screenshot:** [Shows a registration form with empty Name and Email fields]\n\n**Your response:**\n<thought>The user wants me to fill out the registration form with specific values and return the email address. I can see the form has two fields: Name and Email. Both are currently empty. I should start by clicking on the Name field. Note: The instruction is to fill the form only (not submit), and return the email at the end.</thought>\n<log>Click on the Name field to start filling the form</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateNameField}\n}\n</action-param-json>\n\n### Turn 2 - After clicking Name field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field now focused/active]\n\n**Your response:**\n<thought>The Name field is now focused. I need to type 'John' into this field. After this, I'll need to fill the Email field as well.</thought>\n<log>Typing 'John' into the Name field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"John\"\n}\n</action-param-json>\n\n### Turn 3 - After entering name\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name field containing 'John']\n\n**Your response:**\n<thought>The Name field now shows 'John'. Next, I need to fill the Email field with 'john@example.com'. I'll click on the Email field first.</thought>\n<log>Moving to the Email field</log>\n<action-type>Tap</action-type>\n<action-param-json>\n{\n \"locate\": ${locateEmailField}\n}\n</action-param-json>\n\n### Turn 4 - After clicking Email field\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email field focused]\n\n**Your response:**\n<thought>The Email field is now focused. I'll enter 'john@example.com'. After this, both required fields will be filled and my task will be complete.</thought>\n<log>Typing email address into the Email field</log>\n<action-type>Input</action-type>\n<action-param-json>\n{\n \"value\": \"john@example.com\"\n}\n</action-param-json>\n\n### Turn 5 - After entering email (Instruction fulfilled)\n\n**User message:**\nThe previous action has been executed, here is the latest screenshot. Please continue according to the instruction.\n\n**Screenshot:** [Shows the form with Name='John' and Email='john@example.com']\n\n**Your response:**\n<thought>Both fields are now filled: Name shows 'John' and Email shows 'john@example.com'. The user asked me to return the filled email address, so I should include 'john@example.com' in my response. The instruction has been fulfilled.</thought>\n<complete success=\"true\">john@example.com</complete>\n`\n}`;\n}\n"],"names":["locateParamSchemaDescription","promptSpec","OBSERVE_STEP_NOTES","MEMORY_STEP_NOTES","RUN_ADB_SHELL_ACTION_GUIDANCE","buildActionStepNotes","actionList","findDefaultValue","field","current","visited","Set","currentWithDef","injectLocateResultIntoSample","sample","locateFields","resultKey","sampleResults","result","sampleResultIndex","descriptionForAction","action","locateParamTypeDescription","includeLocateInPlanning","locatePromptSpec","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","findAllMidsceneLocatorField","sampleWithLocateResult","sampleStr","systemPromptToTaskPlanning","actionSpace","includeThought","includeSubGoals","preferredLanguage","getPreferredLanguage","Error","planningModelFamilyRequiredForLocateMessage","actionDescriptionList","actionStepNotes","shouldIncludeSubGoals","locateExample","prompt","exampleValueIndex","locateParamExample","locateExample1","locateNameField","locateEmailField","step1Title","step1Description","explicitInstructionRule","thoughtTagDescription","subGoalTags","memoryStepNumber","checkGoalStepNumber","actionStepNumber","locateGroundingRules"],"mappings":";;;;;;AAaA,MAAMA,+BAA+B,CAACC;IACpC,IAAIA,YACF,OAAO,CAAC,CAAC,EAAEA,WAAW,SAAS,CAAC,EAAE,EAAEA,WAAW,iBAAiB,CAAC,sBAAsB,EAAEA,WAAW,sBAAsB,EAAE;IAE9H,OAAO;AACT;AAEA,MAAMC,qBAAqB;AAO3B,MAAMC,oBAAoB;AAc1B,MAAMC,gCACJ;AAEF,MAAMC,uBAAuB,CAACC,aAC5B;QACE;QACA;WACIA,WAAW,QAAQ,CAAC,iBACpB;YAACF;SAA8B,GAC/B,EAAE;QACN;QACA;QACA;KACD,CAAC,IAAI,CAAC;AAKT,MAAMG,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAOA,MAAMC,+BAA+B,CACnCC,QACAC,cACAd;IAEA,MAAMe,YAAYf,WAAW,SAAS;IACtC,MAAMgB,gBAAgBhB,WAAW,aAAa;IAC9C,MAAMiB,SAAS;QAAE,GAAGJ,MAAM;IAAC;IAC3B,IAAIK,oBAAoB;IACxB,KAAK,MAAMX,SAASO,aAClB,IACEG,MAAM,CAACV,MAAM,IACb,AAAyB,YAAzB,OAAOU,MAAM,CAACV,MAAM,IACpBU,MAAM,CAACV,MAAM,CAAC,MAAM,EACpB;QACAU,MAAM,CAACV,MAAM,GAAG;YACd,GAAGU,MAAM,CAACV,MAAM;YAChB,CAACQ,UAAU,EAAEC,aAAa,CAACE,oBAAoBF,cAAc,MAAM,CAAC;QACtE;QACAE;IACF;IAEF,OAAOD;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC,4BACAC,0BAA0B,KAAK,EAC/BC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEL,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMM,aAAuB,EAAE;QAG/B,MAAMC,SAASP,OAAO,WAAW;QAIjC,MAAMQ,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKvB,MAAM,IAAIwB,OAAO,OAAO,CAACF,OACxC,IAAItB,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMyB,aACJ,AACE,cADF,OAAQzB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAM0B,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAe5B,OAAOc;gBAGvC,MAAMe,cAAcC,kBAAkB9B;gBAGtC,MAAM+B,eAAehC,iBAAiBC;gBACtC,MAAMgC,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAGA,IAAI1B,OAAO,MAAM,IAAI,AAAyB,YAAzB,OAAOA,OAAO,MAAM,EAAe;QACtD,MAAMN,eAAeiC,4BAA4B3B,OAAO,WAAW;QACnE,MAAM4B,yBACJ1B,2BAA2BC,mBACvBX,6BACEQ,OAAO,MAAM,EACbN,cACAS,oBAEFH,OAAO,MAAM;QACnB,MAAM6B,YAAY,CAAC,WAAW,EAAEzB,MAAMA,IAAI,aAAa,EAAEJ,OAAO,IAAI,CAAC,gBAAgB,EAAEI,MAAMA,IAAI,qBAAqB,EAAEA,MAAMA,MAAMoB,KAAK,SAAS,CAACI,wBAAwB,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,EAAExB,MAAMA,KAAK,EAAE,EAAE,EAAEA,MAAMA,IAAI,oBAAoB,CAAC;QACxPC,OAAO,IAAI,CAACwB;IACd;IAEA,OAAO,CAAC,EAAE,EAAE7B,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEI,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe0B,2BAA2B,EAC/CC,WAAW,EACX5B,gBAAgB,EAChBD,uBAAuB,EACvB8B,cAAc,EACdC,eAAe,EAOhB;IACC,MAAMC,oBAAoBC;IAE1B,IAAIjC,2BAA2B,CAACC,kBAC9B,MAAM,IAAIiC,MAAMC;IAGlB,MAAMC,wBAAwBP,YAAY,GAAG,CAAC,CAAC/B,SACtCD,qBACLC,QACArB,6BACEuB,0BAA0BC,mBAAmBiB,SAE/ClB,yBACAC;IAGJ,MAAMlB,aAAaqD,sBAAsB,IAAI,CAAC;IAC9C,MAAMC,kBAAkBvD,qBAAqBC;IAG7C,MAAMuD,wBAAwBP,mBAAmB;IAEjD,MAAMQ,gBAAgB,CAACC,QAAgBC,oBACrCC,mBACEF,QACAxC,0BAA0BC,mBAAmBiB,QAC7CjB,kBAAkB,aAAa,CAACwC,kBAAkB,IAChDxC,kBAAkB,aAAa,CAAC,EAAE;IAExC,MAAM0C,iBAAiBJ,cACrB,8CACA;IAEF,MAAMK,kBAAkBL,cACtB,6CACA;IAEF,MAAMM,mBAAmBN,cACvB,8CACA;IAOF,MAAMO,aAAaR,wBACf,uGACA;IAEJ,MAAMS,mBAAmBT,wBACrB,wNACA;IAEJ,MAAMU,0BAA0B;IAEhC,MAAMC,wBAAwBX,wBAC1B,CAAC;;;;AAIP,EAAEU,yBAAyB,GACrB,CAAC;;;;AAIP,EAAEA,yBAAyB;IAEzB,MAAME,cAAcZ,wBAChB,CAAC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;qBAuDc,CAAC,GAChB;IAIJ,MAAMa,mBAAmB;IACzB,MAAMC,sBAAsBd,wBAAwB,IAAI;IACxD,MAAMe,mBAAmBf,wBAAwB,IAAI;IAErD,OAAO,CAAC;;;AAGV,EAAEQ,WAAW;;AAEb,EAAEC,iBAAiB;AACnB,EAAET,wBAAwB,CAAC,EAAE,EAAE3D,mBAAmB,EAAE,CAAC,GAAG,GAAG;;;AAG3D,EAAEsE,sBAAsB;AACxB,EAAEC,YAAY;AACd,EACEZ,wBACI,CAAC;QACC,EAAEa,iBAAiB;;AAE3B,EAAEvE,kBAAkB;;;AAGpB,CAAC,GACK,GACL;QACO,EAAEwE,oBAAoB,EAAE,EAAEd,wBAAwB,kCAAkC,wCAAwC;;AAEpI,EAAEA,wBAAwB,+EAA+E,YAAY;;;;;;;;;;QAU7G,EAAEA,wBAAwB,sBAAsB,wBAAwB;MAC1E,EAAEA,wBAAwB,yBAAyB,2BAA2B;;;;wBAI5D,EAAEA,wBAAwB,sBAAsB,wBAAwB;6BACnE,EAAEA,wBAAwB,sBAAsB,wBAAwB;qCAChE,EAAEA,wBAAwB,sBAAsB,wBAAwB;4BACjF,EAAEA,wBAAwB,sBAAsB,wBAAwB;;;;;;;;;;;;;;;;;;;;;;;;;0NAyBsH,EAAEA,wBAAwB,aAAa,KAAK;;;;;;;;;;;;AAYtQ,EACE,CAACA,wBACG,CAAC;;;;;AAKP,CAAC,GACK,GACL;;;sEAGqE,EAAEe,iBAAiB;;yCAEhD,EAAEf,wBAAwB,+IAA+I,GAAG,wZAAwZ,EAAEA,wBAAwB,iEAAiE,uDAAuD;;;;QAIvvB,EAAEe,iBAAiB;;mGAEwE,EAAEf,wBAAwB,kBAAkB,GAAG;;;;;;;;AAQlJ,EAAED,gBAAgB;;AAElB,EACErC,0BACI,GAAGsD,uBAAuB;;AAEhC,CAAC,GACK,GACL;;AAED,EAAEvE,WAAW;;;;;;QAML,EAAEiD,kBAAkB;;;;;;;;;;;;;;;;;;;;YAoBhB,EAAEW,eAAe;;;;;;;;;;;;;;;;;;;;oBAoBT,EAAEL,wBAAwB,cAAc,GAAG;;AAE/D,EACEA,wBACI,CAAC;;;;;;;;AAQP,CAAC,GACK,KAEJA,wBACI,CAAC;UACG,EAAEa,iBAAiB;;AAE7B,CAAC,GACK,GACL;;;iBAGgB,EAAEb,wBAAwB,yBAAyB,2BAA2B,iBAAiB,EAAEc,oBAAoB;;;iBAGrH,EAAEd,wBAAwB,yBAAyB,+BAA+B,WAAW,EAAEe,iBAAiB;;;;;;;;AAQjI,EACEf,wBACI,CAAC;;;;;;;;;;;;;;;;;;;;;;;YAuBK,EAAEM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAuDlB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoD/B,CAAC,GACK,CAAC;;;;;;;;;;;;;;;;;;YAkBK,EAAED,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAkClB,EAAEC,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+B/B,CAAC,EACC;AACF"}
@@ -298,7 +298,7 @@ function normalizeMobileSwipeParam(param, screenSize) {
298
298
  }
299
299
  const defineActionSwipe = (config)=>defineAction({
300
300
  name: 'Swipe',
301
- description: 'Perform a touch gesture for interactions beyond regular scrolling (e.g., flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use "distance" + "direction" for relative movement, or "end" for precise endpoint.',
301
+ description: 'Perform a touch gesture for interactions beyond regular scrolling (e.g., adjust a continuous control such as a slider, flip pages in a carousel, dismiss a notification, swipe-to-delete a list item). For regular content scrolling, use Scroll instead. Use "distance" + "direction" for relative movement, or "start" + "end" for precise endpoint movement.',
302
302
  paramSchema: ActionSwipeParamSchema,
303
303
  sample: {
304
304
  start: {