@midscene/core 1.7.11-beta-20260512114859.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +5 -5
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +5 -5
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
- package/dist/es/ai-model/inspect.mjs +2 -4
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +1 -3
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/codex-app-server.mjs +10 -11
- package/dist/es/ai-model/service-caller/codex-app-server.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +52 -39
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +5 -5
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +5 -5
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
- package/dist/lib/ai-model/inspect.js +2 -4
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +1 -3
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/codex-app-server.js +10 -11
- package/dist/lib/ai-model/service-caller/codex-app-server.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +52 -39
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/tasks.d.ts +2 -2
- package/dist/types/ai-model/llm-planning.d.ts +2 -2
- package/dist/types/ai-model/service-caller/codex-app-server.d.ts +5 -5
- package/dist/types/ai-model/service-caller/index.d.ts +1 -4
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import {\n AIResponseParseError,\n ConversationHistory,\n autoGLMPlanning,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeepThinkOption,\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst warnLog = getDebug('device-task-executor', { console: true });\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n waitAfterAction?: number;\n\n useDeviceTime?: boolean;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n waitAfterAction?: number;\n useDeviceTime?: boolean;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.waitAfterAction = opts.waitAfterAction;\n this.useDeviceTime = opts.useDeviceTime;\n this.hooks = opts.hooks;\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n waitAfterAction: opts.waitAfterAction,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n /**\n * Get a readable time string. When device time is enabled, use the\n * device-formatted wall-clock time directly so host timezone formatting does\n * not reinterpret a device timestamp.\n * @param format - Optional format string\n * @returns A formatted time string\n */\n private async getTimeString(format?: string): Promise<string> {\n if (this.useDeviceTime) {\n if (this.interface.getDeviceLocalTimeString) {\n try {\n return await this.interface.getDeviceLocalTimeString(format);\n } catch (error) {\n warnLog(\n `Failed to get device time string, falling back to runtime time: ${error}`,\n );\n }\n } else {\n warnLog(\n 'useDeviceTime is enabled but getDeviceLocalTimeString is not implemented, falling back to runtime time.',\n );\n }\n }\n\n return getReadableTimeString(format);\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Act', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n fileChooserAccept?: string[],\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n deepLocate,\n abortSignal,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n const conversationHistory = new ConversationHistory();\n\n const session = this.createExecutionSession(\n taskTitleStr('Act', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n // Check abort signal before each planning cycle\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // Get sub-goal status text if available\n const subGoalStatus = conversationHistory.subGoalsToText() || undefined;\n\n // Get memories text if available\n const memoriesStatus = conversationHistory.memoriesToText() || undefined;\n\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n deepThink,\n ...(subGoalStatus ? { subGoalStatus } : {}),\n ...(memoriesStatus ? { memoriesStatus } : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { modelFamily } = modelConfigForPlanning;\n const timing = executorContext.task.timing;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl = isUITars(modelFamily)\n ? uiTarsPlanning\n : isAutoGLM(modelFamily)\n ? autoGLMPlanning\n : plan;\n\n let planResult: Awaited<ReturnType<typeof planImpl>>;\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n abortSignal,\n });\n } catch (planError) {\n if (planError instanceof AIResponseParseError) {\n // Record usage and rawResponse even when parsing fails\n executorContext.task.usage = planError.usage;\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse: planError.rawResponse,\n };\n }\n throw planError;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n memory,\n error,\n usage,\n rawResponse,\n reasoning_content,\n finalizeSuccess,\n finalizeMessage,\n updateSubGoals,\n markFinishedIndexes,\n } = planResult;\n outputString = finalizeMessage;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n memory,\n yamlFlow: planResult.yamlFlow,\n output: finalizeMessage,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n updateSubGoals,\n markFinishedIndexes,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n // Check if task was finalized with failure\n if (finalizeSuccess === false) {\n assert(\n false,\n `Task failed: ${finalizeMessage || 'No error message provided'}\\n${log || ''}`,\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n deepLocate,\n abortSignal,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n conversationHistory.pendingFeedbackMessage,\n );\n }\n\n // Set initial time context for the first planning call\n const initialTimeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;\n\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check abort signal after executing actions\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!conversationHistory.pendingFeedbackMessage) {\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n domIncluded: opt?.domIncluded,\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n keyOfResult = type;\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n uiContext,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n // AI model may return {result: ...} instead of {[keyOfResult]: ...}\n if (data?.[keyOfResult] !== undefined) {\n outputResult = (data as any)[keyOfResult];\n } else if (data?.result !== undefined) {\n outputResult = (data as any).result;\n } else {\n assert(false, 'No result in query data');\n }\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const elapsed = now - currentCheckStart;\n const timeRemaining = checkIntervalMs - elapsed;\n const thought = `Check interval is ${checkIntervalMs}ms, ${elapsed}ms elapsed since last check, sleeping for ${timeRemaining}ms`;\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [{ type: 'Sleep', param: { timeMs: timeRemaining }, thought }],\n modelConfig,\n modelConfig,\n );\n if (sleepTasks[0]) {\n await session.appendAndRun(sleepTasks[0]);\n }\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","warnLog","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","format","error","getReadableTimeString","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","deepLocate","abortSignal","withFileChooser","conversationHistory","ConversationHistory","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","subGoalStatus","memoriesStatus","modelFamily","timing","actionSpace","action","Array","console","planImpl","isUITars","uiTarsPlanning","isAutoGLM","autoGLMPlanning","plan","planResult","setTimingFieldOnce","planError","AIResponseParseError","JSON","actions","thought","log","memory","usage","rawResponse","reasoning_content","finalizeSuccess","finalizeMessage","updateSubGoals","markFinishedIndexes","executables","initialTimeString","timeString","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","elapsed","timeRemaining","sleepTasks","interfaceInstance","service","opts","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AA2DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,wBAAwB;IAAE,SAAS;AAAK;AACjE,MAAME,wCAAwC;AAIvC,MAAMC;IAsBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAiCQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IASA,MAAc,cAAcG,MAAe,EAAmB;QAC5D,IAAI,IAAI,CAAC,aAAa,EACpB,IAAI,IAAI,CAAC,SAAS,CAAC,wBAAwB,EACzC,IAAI;YACF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,wBAAwB,CAACA;QACvD,EAAE,OAAOC,OAAO;YACdR,QACE,CAAC,gEAAgE,EAAEQ,OAAO;QAE9E;aAEAR,QACE;QAKN,OAAOS,sBAAsBF;IAC/B;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCR,OAIC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BM,OACAC,wBACAC,6BACAR;IAEJ;IAEA,MAAM,uBAAuBS,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,OAAOH;QAGtB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJnB,KAAa,EACbO,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACZ;QAC5C,MAAM,EAAEoB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BC,iBAA4B,EAC5BC,UAAoB,EACpBC,WAAyB,EASzB;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAEH,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC,WACAE,YACAC;IAGN;IAEA,MAAc,UACZT,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BE,UAAoB,EACpBC,WAAyB,EASzB;QACA,MAAME,sBAAsB,IAAIC;QAEhC,MAAMvB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,OAAOU;QAEtB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIwB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJX,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEoB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAGJ,MAAO,KAAM;YAEX,IAAIT,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,MAAMU,gBAAgBR,oBAAoB,cAAc,MAAMK;YAG9D,MAAMI,iBAAiBT,oBAAoB,cAAc,MAAMK;YAE/D,MAAMlB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;oBACA,GAAIa,gBAAgB;wBAAEA;oBAAc,IAAI,CAAC,CAAC;oBAC1C,GAAIC,iBAAiB;wBAAEA;oBAAe,IAAI,CAAC,CAAC;gBAC9C;gBACA,UAAU,OAAO5B,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAE2B,WAAW,EAAE,GAAGpC;oBACxB,MAAMqC,SAAS7B,gBAAgB,IAAI,CAAC,MAAM;oBAE1C,MAAM8B,cAAc,IAAI,CAAC,cAAc;oBACvCnD,MACE,sCACAmD,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhD7B,OAAO8B,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WAAWC,SAASP,eACtBQ,iBACAC,UAAUT,eACRU,kBACAC;oBAEN,IAAIC;oBACJ,IAAI;wBACFC,mBAAmBZ,QAAQ;wBAC3BW,aAAa,MAAMN,SAASnC,MAAM,eAAe,EAAE;4BACjD,SAASE;4BACT,eAAeF,MAAM,YAAY;4BACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;4BAC3C+B;4BACA,aAAatC;4BACb0B;4BACA,aAAaV;4BACbI;4BACAC;4BACAG;wBACF;oBACF,EAAE,OAAO0B,WAAW;wBAClB,IAAIA,qBAAqBC,sBAAsB;4BAE7C3C,gBAAgB,IAAI,CAAC,KAAK,GAAG0C,UAAU,KAAK;4BAC5C1C,gBAAgB,IAAI,CAAC,GAAG,GAAG;gCACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gCAClC,aAAa0C,UAAU,WAAW;4BACpC;wBACF;wBACA,MAAMA;oBACR,SAAU;wBACRD,mBAAmBZ,QAAQ;oBAC7B;oBACAlD,MAAM,cAAciE,KAAK,SAAS,CAACJ,YAAY,MAAM;oBAErD,MAAM,EACJK,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,MAAM,EACN3D,KAAK,EACL4D,KAAK,EACLC,WAAW,EACXC,iBAAiB,EACjBC,eAAe,EACfC,eAAe,EACfC,cAAc,EACdC,mBAAmB,EACpB,GAAGf;oBACJf,eAAe4B;oBAEfrD,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCkD;oBACF;oBACAlD,gBAAgB,IAAI,CAAC,KAAK,GAAGiD;oBAC7BjD,gBAAgB,IAAI,CAAC,iBAAiB,GAAGmD;oBACzCnD,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS6C,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUR,WAAW,QAAQ;wBAC7B,QAAQa;wBACR,wBAAwBb,WAAW,sBAAsB;wBACzDc;wBACAC;oBACF;oBACAvD,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACb,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAE0D,OAAO,IAAI;oBAG3D,IAAIK,AAAoB,UAApBA,iBACFlD,OACE,OACA,CAAC,aAAa,EAAEmD,mBAAmB,4BAA4B,EAAE,EAAEN,OAAO,IAAI;oBAIlF,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMP,aAAanC,QAAQ;YAG3B,MAAMd,QAAQiD,YAAY,WAAW,EAAE;YACvCnB,SAAS,IAAI,IAAKmB,YAAY,YAAY,EAAE;YAE5C,IAAIgB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CjE,OACAC,wBACAC,6BACA;oBACEiB;oBACAK;oBACAC;gBACF;YAEJ,EAAE,OAAO3B,OAAO;gBACd,OAAOO,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEP,MAAM,SAAS,EAAEuD,KAAK,SAAS,CAC5ErD,QACC;YAEP;YACA,IAAI2B,oBAAoB,sBAAsB,EAC5Ce,QAAQ,IAAI,CACV,8FACAf,oBAAoB,sBAAsB;YAK9C,MAAMuC,oBAAoB,MAAM,IAAI,CAAC,aAAa;YAClDvC,oBAAoB,sBAAsB,IAAI,CAAC,cAAc,EAAEuC,mBAAmB;YAElF,IAAI;gBACF,MAAM7D,QAAQ,YAAY,CAAC4D,YAAY,KAAK;YAC9C,EAAE,OAAOnE,OAAY;gBAEnBmC;gBACA,MAAMkC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CxC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEwC,WAAW,iCAAiC,EAAErE,OAAO,WAAWsE,OAAOtE,QAAQ;gBACrIV,MACE,yFACAU,iBAAiBuE,QAAQvE,MAAM,OAAO,GAAGsE,OAAOtE,QAChD,6CACAmC;YAEJ;YAEA,IAAIA,8BAA8B1C,uCAChC,OAAOc,QAAQ,eAAe,CAAC;YAIjC,IAAIoB,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,IAAI,CAACwB,YAAY,wBACf;YAIF,EAAEpB;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAMuC,WAAW,CAAC,UAAU,EAAEvC,qBAAqB,4JAA4J,CAAC;gBAChN,OAAO1B,QAAQ,eAAe,CAACiE;YACjC;YAEA,IAAI,CAAC3C,oBAAoB,sBAAsB,EAAE;gBAC/C,MAAMwC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CxC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEwC,WAAW,gDAAgD,CAAC;YACpH;QACF;QAEA,OAAO;YACL,QAAQ;gBACNrC;gBACA,QAAQI;YACV;YACAtB;QACF;IACF;IAEQ,oBACN2D,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,aAAaG,KAAK;gBAClB,YAAYC,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOhE,OAAOqE;gBACtB,MAAM,EAAEtE,IAAI,EAAE,GAAGsE;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZzE,KAAK,GAAG,GAAG;wBACTyE;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACAzE,KAAK,KAAK,GAAGyE,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,mBACjBzE,KAAK,iBAAiB,GAAGyE,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMtE,YAAYmE,YAAY,SAAS;gBACvClE,OAAOD,WAAW;gBAElB,MAAMuE,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBAAkB;oBAC3BE,cAAcZ;oBACdW,cAAc;wBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;oBACrC;gBACF;gBAEA,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DtF,MAAM;oBACN,MAAMmG,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX,kBACAjE;gBAEJ,EAAE,OAAOZ,OAAO;oBACd,IAAIA,iBAAiB2F,cACnBV,UAAUjF,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE4F,IAAI,EAAEnC,OAAO,EAAEyB,IAAI,EAAE,GAAGK;gBAChCN,UAAUC;gBAEV,IAAIW,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBAGf,IAAID,MAAM,CAACP,YAAY,KAAKnD,QAC1B2D,eAAgBD,IAAY,CAACP,YAAY;qBACpC,IAAIO,MAAM,WAAW1D,QAC1B2D,eAAgBD,KAAa,MAAM;qBAEnC/E,OAAO,OAAO;gBAKpB,IAAI4D,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCpF,KAAK,OAAO,GAAGgD;oBACf,MAAM,IAAIc,MAAM,CAAC,kBAAkB,EAAEd,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQoC;oBACR,KAAKb;oBACLvB;gBACF;YACF;QACF;QAEA,OAAOqB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMtE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEiE,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASnB,KAAK,SAAS,CAACmB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAM/D,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACuE;QAE1C,IAAI,CAAC9D,QACH,MAAM,IAAIuD,MACR;QAIJ,MAAM,EAAEtD,MAAM,EAAEwC,OAAO,EAAE,GAAGzC;QAE5B,OAAO;YACLC;YACAwC;YACA3C;QACF;IACF;IAEA,MAAM,QACJgF,SAAsB,EACtBlB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEoB,UAAU,EAAElB,gBAAgB,EAAE,GAAGmB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMxF,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWyF;QAE1B,MAAMnF,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJ2F,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG1B;QACJ,MAAM2B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEAzF,OAAOiF,WAAW;QAClBjF,OAAOqF,WAAW;QAClBrF,OAAOsF,iBAAiB;QAExBtF,OACEsF,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAM9B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAiB,YACApB,aACA4B,mBACA1B;YAGF,MAAM7D,SAAU,MAAMT,QAAQ,YAAY,CAACuE;YAO3C,IAAI9D,QAAQ,QACV,OAAO;gBACL,QAAQkB;gBACRpB;YACF;YAGF6F,eACE3F,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAE+E,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,UAAUD,MAAMD;gBACtB,MAAMG,gBAAgBZ,kBAAkBW;gBACxC,MAAMrD,UAAU,CAAC,kBAAkB,EAAE0C,gBAAgB,IAAI,EAAEW,QAAQ,0CAA0C,EAAEC,cAAc,EAAE,CAAC;gBAChI,MAAM,EAAE,OAAOC,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;oBAAC;wBAAE,MAAM;wBAAS,OAAO;4BAAE,QAAQD;wBAAc;wBAAGtD;oBAAQ;iBAAE,EAC9DkB,aACAA;gBAEF,IAAIqC,UAAU,CAAC,EAAE,EACf,MAAMzG,QAAQ,YAAY,CAACyG,UAAU,CAAC,EAAE;YAE5C;QACF;QAEA,OAAOzG,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEoG,cAAc;IACnE;IA9sBA,YACEM,iBAAoC,EACpCC,OAAgB,EAChBC,IAQC,CACD;QArCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAEA;QAEA;QAoBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,eAAe,GAAGA,KAAK,eAAe;QAC3C,IAAI,CAAC,aAAa,GAAGA,KAAK,aAAa;QACvC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAGA,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCH;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;YAChC,iBAAiBA,KAAK,eAAe;QACvC;IACF;AAkrBF;AAEO,eAAevF,gBACpBqF,iBAAoC,EACpCxF,iBAAuC,EACvCiB,MAAwB;IAExB,IAAI,CAACjB,mBAAmB,QACtB,OAAOiB;IAGT,IAAI,CAACuE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI1C,MACR,CAAC,gCAAgC,EAAE0C,kBAAkB,aAAa,EAAE;IAIxE,MAAMI,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAAC7F;IACvB;IAEA,MAAM,EAAE8F,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMP,kBAAkB,2BAA2B,CAACI;IACtD,IAAI;QACF,MAAMrG,SAAS,MAAM0B;QAErB,MAAM1C,QAAQwH;QACd,IAAIxH,OACF,MAAMA;QAER,OAAOgB;IACT,SAAU;QACRuG;IACF;AACF"}
|
|
1
|
+
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import {\n AIResponseParseError,\n ConversationHistory,\n autoGLMPlanning,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { setTimingFieldOnce } from '@/task-timing';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst warnLog = getDebug('device-task-executor', { console: true });\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n waitAfterAction?: number;\n\n useDeviceTime?: boolean;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n waitAfterAction?: number;\n useDeviceTime?: boolean;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.waitAfterAction = opts.waitAfterAction;\n this.useDeviceTime = opts.useDeviceTime;\n this.hooks = opts.hooks;\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n waitAfterAction: opts.waitAfterAction,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n /**\n * Get a readable time string. When device time is enabled, use the\n * device-formatted wall-clock time directly so host timezone formatting does\n * not reinterpret a device timestamp.\n * @param format - Optional format string\n * @returns A formatted time string\n */\n private async getTimeString(format?: string): Promise<string> {\n if (this.useDeviceTime) {\n if (this.interface.getDeviceLocalTimeString) {\n try {\n return await this.interface.getDeviceLocalTimeString(format);\n } catch (error) {\n warnLog(\n `Failed to get device time string, falling back to runtime time: ${error}`,\n );\n }\n } else {\n warnLog(\n 'useDeviceTime is enabled but getDeviceLocalTimeString is not implemented, falling back to runtime time.',\n );\n }\n }\n\n return getReadableTimeString(format);\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n deepLocate?: boolean;\n abortSignal?: AbortSignal;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Act', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n planningModeDeepThink?: boolean,\n fileChooserAccept?: string[],\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n planningModeDeepThink,\n deepLocate,\n abortSignal,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n planningModeDeepThink?: boolean,\n deepLocate?: boolean,\n abortSignal?: AbortSignal,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n const conversationHistory = new ConversationHistory();\n\n const session = this.createExecutionSession(\n taskTitleStr('Act', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n // Check abort signal before each planning cycle\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // Get sub-goal status text if available\n const subGoalStatus = conversationHistory.subGoalsToText() || undefined;\n\n // Get memories text if available\n const memoriesStatus = conversationHistory.memoriesToText() || undefined;\n\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n planningModeDeepThink,\n ...(subGoalStatus ? { subGoalStatus } : {}),\n ...(memoriesStatus ? { memoriesStatus } : {}),\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { modelFamily } = modelConfigForPlanning;\n const timing = executorContext.task.timing;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl = isUITars(modelFamily)\n ? uiTarsPlanning\n : isAutoGLM(modelFamily)\n ? autoGLMPlanning\n : plan;\n\n let planResult: Awaited<ReturnType<typeof planImpl>>;\n try {\n setTimingFieldOnce(timing, 'callAiStart');\n planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n planningModeDeepThink,\n abortSignal,\n });\n } catch (planError) {\n if (planError instanceof AIResponseParseError) {\n // Record usage and rawResponse even when parsing fails\n executorContext.task.usage = planError.usage;\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse: planError.rawResponse,\n };\n }\n throw planError;\n } finally {\n setTimingFieldOnce(timing, 'callAiEnd');\n }\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n memory,\n error,\n usage,\n rawResponse,\n reasoning_content,\n finalizeSuccess,\n finalizeMessage,\n updateSubGoals,\n markFinishedIndexes,\n } = planResult;\n outputString = finalizeMessage;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n memory,\n yamlFlow: planResult.yamlFlow,\n output: finalizeMessage,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n updateSubGoals,\n markFinishedIndexes,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n // Check if task was finalized with failure\n if (finalizeSuccess === false) {\n assert(\n false,\n `Task failed: ${finalizeMessage || 'No error message provided'}\\n${log || ''}`,\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n deepLocate,\n abortSignal,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n conversationHistory.pendingFeedbackMessage,\n );\n }\n\n // Set initial time context for the first planning call\n const initialTimeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage += `Current time: ${initialTimeString}`;\n\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check abort signal after executing actions\n if (abortSignal?.aborted) {\n return session.appendErrorPlan(\n `Task aborted: ${abortSignal.reason || 'abort signal received'}`,\n );\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!conversationHistory.pendingFeedbackMessage) {\n const timeString = await this.getTimeString();\n conversationHistory.pendingFeedbackMessage = `Time: ${timeString}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n domIncluded: opt?.domIncluded,\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n rawResponse: dump.taskInfo?.rawResponse,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.reasoning_content) {\n task.reasoning_content = dump.taskInfo.reasoning_content;\n }\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n keyOfResult = type;\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n uiContext,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n // AI model may return {result: ...} instead of {[keyOfResult]: ...}\n if (data?.[keyOfResult] !== undefined) {\n outputResult = (data as any)[keyOfResult];\n } else if (data?.result !== undefined) {\n outputResult = (data as any).result;\n } else {\n assert(false, 'No result in query data');\n }\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const elapsed = now - currentCheckStart;\n const timeRemaining = checkIntervalMs - elapsed;\n const thought = `Check interval is ${checkIntervalMs}ms, ${elapsed}ms elapsed since last check, sleeping for ${timeRemaining}ms`;\n const { tasks: sleepTasks } = await this.convertPlanToExecutable(\n [{ type: 'Sleep', param: { timeMs: timeRemaining }, thought }],\n modelConfig,\n modelConfig,\n );\n if (sleepTasks[0]) {\n await session.appendAndRun(sleepTasks[0]);\n }\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","warnLog","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","format","error","getReadableTimeString","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","planningModeDeepThink","fileChooserAccept","deepLocate","abortSignal","withFileChooser","conversationHistory","ConversationHistory","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","subGoalStatus","memoriesStatus","modelFamily","timing","actionSpace","action","Array","console","planImpl","isUITars","uiTarsPlanning","isAutoGLM","autoGLMPlanning","plan","planResult","setTimingFieldOnce","planError","AIResponseParseError","JSON","actions","thought","log","memory","usage","rawResponse","reasoning_content","finalizeSuccess","finalizeMessage","updateSubGoals","markFinishedIndexes","executables","initialTimeString","timeString","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","elapsed","timeRemaining","sleepTasks","interfaceInstance","service","opts","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AA0DA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,wBAAwB;IAAE,SAAS;AAAK;AACjE,MAAME,wCAAwC;AAIvC,MAAMC;IAsBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAiCQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IASA,MAAc,cAAcG,MAAe,EAAmB;QAC5D,IAAI,IAAI,CAAC,aAAa,EACpB,IAAI,IAAI,CAAC,SAAS,CAAC,wBAAwB,EACzC,IAAI;YACF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,wBAAwB,CAACA;QACvD,EAAE,OAAOC,OAAO;YACdR,QACE,CAAC,gEAAgE,EAAEQ,OAAO;QAE9E;aAEAR,QACE;QAKN,OAAOS,sBAAsBF;IAC/B;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCR,OAIC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BM,OACAC,wBACAC,6BACAR;IAEJ;IAEA,MAAM,uBAAuBS,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,OAAOH;QAGtB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJnB,KAAa,EACbO,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACZ;QAC5C,MAAM,EAAEoB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,qBAA+B,EAC/BC,iBAA4B,EAC5BC,UAAoB,EACpBC,WAAyB,EASzB;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAEH,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC,uBACAE,YACAC;IAGN;IAEA,MAAc,UACZT,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,qBAA+B,EAC/BE,UAAoB,EACpBC,WAAyB,EASzB;QACA,MAAME,sBAAsB,IAAIC;QAEhC,MAAMvB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,OAAOU;QAEtB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIwB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJX,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEoB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAGJ,MAAO,KAAM;YAEX,IAAIT,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,MAAMU,gBAAgBR,oBAAoB,cAAc,MAAMK;YAG9D,MAAMI,iBAAiBT,oBAAoB,cAAc,MAAMK;YAE/D,MAAMlB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;oBACA,GAAIa,gBAAgB;wBAAEA;oBAAc,IAAI,CAAC,CAAC;oBAC1C,GAAIC,iBAAiB;wBAAEA;oBAAe,IAAI,CAAC,CAAC;gBAC9C;gBACA,UAAU,OAAO5B,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAE2B,WAAW,EAAE,GAAGpC;oBACxB,MAAMqC,SAAS7B,gBAAgB,IAAI,CAAC,MAAM;oBAE1C,MAAM8B,cAAc,IAAI,CAAC,cAAc;oBACvCnD,MACE,sCACAmD,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhD7B,OAAO8B,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WAAWC,SAASP,eACtBQ,iBACAC,UAAUT,eACRU,kBACAC;oBAEN,IAAIC;oBACJ,IAAI;wBACFC,mBAAmBZ,QAAQ;wBAC3BW,aAAa,MAAMN,SAASnC,MAAM,eAAe,EAAE;4BACjD,SAASE;4BACT,eAAeF,MAAM,YAAY;4BACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;4BAC3C+B;4BACA,aAAatC;4BACb0B;4BACA,aAAaV;4BACbI;4BACAC;4BACAG;wBACF;oBACF,EAAE,OAAO0B,WAAW;wBAClB,IAAIA,qBAAqBC,sBAAsB;4BAE7C3C,gBAAgB,IAAI,CAAC,KAAK,GAAG0C,UAAU,KAAK;4BAC5C1C,gBAAgB,IAAI,CAAC,GAAG,GAAG;gCACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;gCAClC,aAAa0C,UAAU,WAAW;4BACpC;wBACF;wBACA,MAAMA;oBACR,SAAU;wBACRD,mBAAmBZ,QAAQ;oBAC7B;oBACAlD,MAAM,cAAciE,KAAK,SAAS,CAACJ,YAAY,MAAM;oBAErD,MAAM,EACJK,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,MAAM,EACN3D,KAAK,EACL4D,KAAK,EACLC,WAAW,EACXC,iBAAiB,EACjBC,eAAe,EACfC,eAAe,EACfC,cAAc,EACdC,mBAAmB,EACpB,GAAGf;oBACJf,eAAe4B;oBAEfrD,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCkD;oBACF;oBACAlD,gBAAgB,IAAI,CAAC,KAAK,GAAGiD;oBAC7BjD,gBAAgB,IAAI,CAAC,iBAAiB,GAAGmD;oBACzCnD,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS6C,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUR,WAAW,QAAQ;wBAC7B,QAAQa;wBACR,wBAAwBb,WAAW,sBAAsB;wBACzDc;wBACAC;oBACF;oBACAvD,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACb,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAE0D,OAAO,IAAI;oBAG3D,IAAIK,AAAoB,UAApBA,iBACFlD,OACE,OACA,CAAC,aAAa,EAAEmD,mBAAmB,4BAA4B,EAAE,EAAEN,OAAO,IAAI;oBAIlF,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMP,aAAanC,QAAQ;YAG3B,MAAMd,QAAQiD,YAAY,WAAW,EAAE;YACvCnB,SAAS,IAAI,IAAKmB,YAAY,YAAY,EAAE;YAE5C,IAAIgB;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CjE,OACAC,wBACAC,6BACA;oBACEiB;oBACAK;oBACAC;gBACF;YAEJ,EAAE,OAAO3B,OAAO;gBACd,OAAOO,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEP,MAAM,SAAS,EAAEuD,KAAK,SAAS,CAC5ErD,QACC;YAEP;YACA,IAAI2B,oBAAoB,sBAAsB,EAC5Ce,QAAQ,IAAI,CACV,8FACAf,oBAAoB,sBAAsB;YAK9C,MAAMuC,oBAAoB,MAAM,IAAI,CAAC,aAAa;YAClDvC,oBAAoB,sBAAsB,IAAI,CAAC,cAAc,EAAEuC,mBAAmB;YAElF,IAAI;gBACF,MAAM7D,QAAQ,YAAY,CAAC4D,YAAY,KAAK;YAC9C,EAAE,OAAOnE,OAAY;gBAEnBmC;gBACA,MAAMkC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CxC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEwC,WAAW,iCAAiC,EAAErE,OAAO,WAAWsE,OAAOtE,QAAQ;gBACrIV,MACE,yFACAU,iBAAiBuE,QAAQvE,MAAM,OAAO,GAAGsE,OAAOtE,QAChD,6CACAmC;YAEJ;YAEA,IAAIA,8BAA8B1C,uCAChC,OAAOc,QAAQ,eAAe,CAAC;YAIjC,IAAIoB,aAAa,SACf,OAAOpB,QAAQ,eAAe,CAC5B,CAAC,cAAc,EAAEoB,YAAY,MAAM,IAAI,yBAAyB;YAKpE,IAAI,CAACwB,YAAY,wBACf;YAIF,EAAEpB;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAMuC,WAAW,CAAC,UAAU,EAAEvC,qBAAqB,4JAA4J,CAAC;gBAChN,OAAO1B,QAAQ,eAAe,CAACiE;YACjC;YAEA,IAAI,CAAC3C,oBAAoB,sBAAsB,EAAE;gBAC/C,MAAMwC,aAAa,MAAM,IAAI,CAAC,aAAa;gBAC3CxC,oBAAoB,sBAAsB,GAAG,CAAC,MAAM,EAAEwC,WAAW,gDAAgD,CAAC;YACpH;QACF;QAEA,OAAO;YACL,QAAQ;gBACNrC;gBACA,QAAQI;YACV;YACAtB;QACF;IACF;IAEQ,oBACN2D,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,aAAaG,KAAK;gBAClB,YAAYC,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOhE,OAAOqE;gBACtB,MAAM,EAAEtE,IAAI,EAAE,GAAGsE;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZzE,KAAK,GAAG,GAAG;wBACTyE;wBACA,aAAaA,KAAK,QAAQ,EAAE;oBAC9B;oBACAzE,KAAK,KAAK,GAAGyE,KAAK,QAAQ,EAAE;oBAC5B,IAAIA,KAAK,QAAQ,EAAE,mBACjBzE,KAAK,iBAAiB,GAAGyE,KAAK,QAAQ,CAAC,iBAAiB;gBAE5D;gBAGA,MAAMtE,YAAYmE,YAAY,SAAS;gBACvClE,OAAOD,WAAW;gBAElB,MAAMuE,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBAAkB;oBAC3BE,cAAcZ;oBACdW,cAAc;wBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;oBACrC;gBACF;gBAEA,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DtF,MAAM;oBACN,MAAMmG,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX,kBACAjE;gBAEJ,EAAE,OAAOZ,OAAO;oBACd,IAAIA,iBAAiB2F,cACnBV,UAAUjF,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE4F,IAAI,EAAEnC,OAAO,EAAEyB,IAAI,EAAE,GAAGK;gBAChCN,UAAUC;gBAEV,IAAIW,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBAGf,IAAID,MAAM,CAACP,YAAY,KAAKnD,QAC1B2D,eAAgBD,IAAY,CAACP,YAAY;qBACpC,IAAIO,MAAM,WAAW1D,QAC1B2D,eAAgBD,KAAa,MAAM;qBAEnC/E,OAAO,OAAO;gBAKpB,IAAI4D,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCpF,KAAK,OAAO,GAAGgD;oBACf,MAAM,IAAIc,MAAM,CAAC,kBAAkB,EAAEd,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQoC;oBACR,KAAKb;oBACLvB;gBACF;YACF;QACF;QAEA,OAAOqB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMtE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEiE,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASnB,KAAK,SAAS,CAACmB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAM/D,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACuE;QAE1C,IAAI,CAAC9D,QACH,MAAM,IAAIuD,MACR;QAIJ,MAAM,EAAEtD,MAAM,EAAEwC,OAAO,EAAE,GAAGzC;QAE5B,OAAO;YACLC;YACAwC;YACA3C;QACF;IACF;IAEA,MAAM,QACJgF,SAAsB,EACtBlB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEoB,UAAU,EAAElB,gBAAgB,EAAE,GAAGmB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMxF,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWyF;QAE1B,MAAMnF,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJ2F,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG1B;QACJ,MAAM2B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEAzF,OAAOiF,WAAW;QAClBjF,OAAOqF,WAAW;QAClBrF,OAAOsF,iBAAiB;QAExBtF,OACEsF,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAM9B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAiB,YACApB,aACA4B,mBACA1B;YAGF,MAAM7D,SAAU,MAAMT,QAAQ,YAAY,CAACuE;YAO3C,IAAI9D,QAAQ,QACV,OAAO;gBACL,QAAQkB;gBACRpB;YACF;YAGF6F,eACE3F,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAE+E,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,UAAUD,MAAMD;gBACtB,MAAMG,gBAAgBZ,kBAAkBW;gBACxC,MAAMrD,UAAU,CAAC,kBAAkB,EAAE0C,gBAAgB,IAAI,EAAEW,QAAQ,0CAA0C,EAAEC,cAAc,EAAE,CAAC;gBAChI,MAAM,EAAE,OAAOC,UAAU,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAC9D;oBAAC;wBAAE,MAAM;wBAAS,OAAO;4BAAE,QAAQD;wBAAc;wBAAGtD;oBAAQ;iBAAE,EAC9DkB,aACAA;gBAEF,IAAIqC,UAAU,CAAC,EAAE,EACf,MAAMzG,QAAQ,YAAY,CAACyG,UAAU,CAAC,EAAE;YAE5C;QACF;QAEA,OAAOzG,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEoG,cAAc;IACnE;IA9sBA,YACEM,iBAAoC,EACpCC,OAAgB,EAChBC,IAQC,CACD;QArCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAEA;QAEA;QAoBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,eAAe,GAAGA,KAAK,eAAe;QAC3C,IAAI,CAAC,aAAa,GAAGA,KAAK,aAAa;QACvC,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAGA,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCH;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;YAChC,iBAAiBA,KAAK,eAAe;QACvC;IACF;AAkrBF;AAEO,eAAevF,gBACpBqF,iBAAoC,EACpCxF,iBAAuC,EACvCiB,MAAwB;IAExB,IAAI,CAACjB,mBAAmB,QACtB,OAAOiB;IAGT,IAAI,CAACuE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI1C,MACR,CAAC,gCAAgC,EAAE0C,kBAAkB,aAAa,EAAE;IAIxE,MAAMI,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAAC7F;IACvB;IAEA,MAAM,EAAE8F,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMP,kBAAkB,2BAA2B,CAACI;IACtD,IAAI;QACF,MAAMrG,SAAS,MAAM0B;QAErB,MAAM1C,QAAQwH;QACd,IAAIxH,OACF,MAAMA;QAER,OAAOgB;IACT,SAAU;QACRuG;IACF;AACF"}
|
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -123,7 +123,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
123
123
|
return;
|
|
124
124
|
}
|
|
125
125
|
}
|
|
126
|
-
const getMidsceneVersion = ()=>"1.
|
|
126
|
+
const getMidsceneVersion = ()=>"1.8.0";
|
|
127
127
|
const parsePrompt = (prompt)=>{
|
|
128
128
|
if ('string' == typeof prompt) return {
|
|
129
129
|
textPrompt: prompt,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/auto-glm/planning.mjs","sources":["../../../../src/ai-model/auto-glm/planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/device';\nimport type { PlanningAIResponse, UIContext } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { ConversationHistory } from '../conversation-history';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../service-caller/index';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\nimport { getAutoGLMPlanPrompt } from './prompt';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGLMPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n actionSpace?: DeviceAction[];\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n\n const systemPrompt =\n getAutoGLMPlanPrompt(modelConfig.modelFamily) +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n modelConfig,\n {
|
|
1
|
+
{"version":3,"file":"ai-model/auto-glm/planning.mjs","sources":["../../../../src/ai-model/auto-glm/planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/device';\nimport type { PlanningAIResponse, UIContext } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { ConversationHistory } from '../conversation-history';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from '../service-caller/index';\nimport { transformAutoGLMAction } from './actions';\nimport { parseAction, parseAutoGLMResponse } from './parser';\nimport { getAutoGLMPlanPrompt } from './prompt';\n\nconst debug = getDebug('auto-glm-planning');\n\nexport async function autoGLMPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n actionSpace?: DeviceAction[];\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n\n const systemPrompt =\n getAutoGLMPlanPrompt(modelConfig.modelFamily) +\n (actionContext\n ? `<high_priority_knowledge>${actionContext}</high_priority_knowledge>`\n : '');\n\n const imagePayloadBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'text', text: userInstruction }],\n });\n conversationHistory.append({\n role: 'user',\n content: [{ type: 'image_url', image_url: { url: imagePayloadBase64 } }],\n });\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...conversationHistory.snapshot(1),\n ];\n\n const { content: rawResponse, usage } = await callAIWithStringResponse(\n msgs,\n modelConfig,\n {\n abortSignal: options.abortSignal,\n },\n );\n\n debug('autoGLMPlanning rawResponse:', rawResponse);\n\n let parsedResponse: ReturnType<typeof parseAutoGLMResponse>;\n let transformedActions: ReturnType<typeof transformAutoGLMAction>;\n\n try {\n parsedResponse = parseAutoGLMResponse(rawResponse);\n debug('thinking in response:', parsedResponse.think);\n debug('action in response:', parsedResponse.content);\n\n const parsedAction = parseAction(parsedResponse);\n debug('Parsed action object:', parsedAction);\n transformedActions = transformAutoGLMAction(\n parsedAction,\n context.shotSize,\n options.actionSpace,\n );\n debug('Transformed actions:', transformedActions);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(rawResponse, undefined, 2),\n usage,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: `<think>${parsedResponse.think}</think><answer>${parsedResponse.content}</answer>`,\n });\n\n const shouldContinuePlanning = !parsedResponse.content.startsWith('finish(');\n\n return {\n actions: transformedActions,\n log: rawResponse,\n usage,\n shouldContinuePlanning,\n rawResponse: JSON.stringify(rawResponse, undefined, 2),\n };\n}\n"],"names":["debug","getDebug","autoGLMPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","systemPrompt","getAutoGLMPlanPrompt","imagePayloadBase64","msgs","rawResponse","usage","callAIWithStringResponse","parsedResponse","transformedActions","parseAutoGLMResponse","parsedAction","parseAction","transformAutoGLMAction","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","shouldContinuePlanning"],"mappings":";;;;;AAcA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,gBACpBC,eAAuB,EACvBC,OAOC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IAErE,MAAMK,eACJC,qBAAqBH,YAAY,WAAW,IAC3CC,CAAAA,gBACG,CAAC,yBAAyB,EAAEA,cAAc,0BAA0B,CAAC,GACrE,EAAC;IAEP,MAAMG,qBAAqBL,QAAQ,UAAU,CAAC,MAAM;IAEpDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAQ,MAAMF;YAAgB;SAAE;IACpD;IACAE,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YAAC;gBAAE,MAAM;gBAAa,WAAW;oBAAE,KAAKM;gBAAmB;YAAE;SAAE;IAC1E;IAEA,MAAMC,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASH;QAAa;WACrCJ,oBAAoB,QAAQ,CAAC;KACjC;IAED,MAAM,EAAE,SAASQ,WAAW,EAAEC,KAAK,EAAE,GAAG,MAAMC,yBAC5CH,MACAL,aACA;QACE,aAAaH,QAAQ,WAAW;IAClC;IAGFJ,MAAM,gCAAgCa;IAEtC,IAAIG;IACJ,IAAIC;IAEJ,IAAI;QACFD,iBAAiBE,qBAAqBL;QACtCb,MAAM,yBAAyBgB,eAAe,KAAK;QACnDhB,MAAM,uBAAuBgB,eAAe,OAAO;QAEnD,MAAMG,eAAeC,YAAYJ;QACjChB,MAAM,yBAAyBmB;QAC/BF,qBAAqBI,uBACnBF,cACAb,QAAQ,QAAQ,EAChBF,QAAQ,WAAW;QAErBJ,MAAM,wBAAwBiB;IAChC,EAAE,OAAOK,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,qBACR,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACd,aAAae,QAAW,IACvCd;IAEJ;IAEAT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS,CAAC,OAAO,EAAEW,eAAe,KAAK,CAAC,gBAAgB,EAAEA,eAAe,OAAO,CAAC,SAAS,CAAC;IAC7F;IAEA,MAAMa,yBAAyB,CAACb,eAAe,OAAO,CAAC,UAAU,CAAC;IAElE,OAAO;QACL,SAASC;QACT,KAAKJ;QACLC;QACAe;QACA,aAAaF,KAAK,SAAS,CAACd,aAAae,QAAW;IACtD;AACF"}
|
|
@@ -383,10 +383,8 @@ async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
|
|
|
383
383
|
content: userPrompt
|
|
384
384
|
}
|
|
385
385
|
];
|
|
386
|
-
debugInspect("AiJudgeOrderSensitive:
|
|
387
|
-
const result = await callAIFn(msgs, modelConfig
|
|
388
|
-
deepThink: false
|
|
389
|
-
});
|
|
386
|
+
debugInspect("AiJudgeOrderSensitive: description=%s", description);
|
|
387
|
+
const result = await callAIFn(msgs, modelConfig);
|
|
390
388
|
return {
|
|
391
389
|
isOrderSensitive: result.content.isOrderSensitive ?? false,
|
|
392
390
|
usage: result.usage
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n modelFamily: IModelConfig['modelFamily'];\n}): Promise<{ rect: Rect; imageBase64: string; scale: number }> {\n const { context, baseRect, modelFamily } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n return {\n rect: sectionRect,\n imageBase64: scaledResult.imageBase64,\n scale: scaleRatio,\n };\n}\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.shotSize.width;\n let imageHeight = context.shotSize.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig, {\n abortSignal: options.abortSignal,\n });\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n { abortSignal: options.abortSignal },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n { abortSignal: options.abortSignal },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n modelFamily,\n });\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.rect.width,\n searchAreaConfig.rect.height,\n searchAreaConfig.scale,\n );\n }\n\n return {\n rect: searchAreaConfig?.rect,\n imageBase64: searchAreaConfig?.imageBase64,\n scale: searchAreaConfig?.scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect(\n 'AiJudgeOrderSensitive: deepThink=false, description=%s',\n description,\n );\n\n const result = await callAIFn(msgs, modelConfig, {\n deepThink: false,\n });\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","buildSearchAreaConfig","options","context","baseRect","modelFamily","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","targetElementDescription","modelConfig","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandedRect","originalWidth","originalHeight","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AA6DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAEvB,eAAeE,sBAAsBC,OAI3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGH;IAC3C,MAAMI,aAAa;IACnB,MAAMC,cAAcC,iBAAiBJ,UAAUD,QAAQ,QAAQ;IAE/D,MAAMM,gBAAgB,MAAMC,WAC1BP,QAAQ,UAAU,CAAC,MAAM,EACzBI,aACAF,AAAgB,iBAAhBA;IAGF,MAAMM,eAAe,MAAMC,WAAWH,cAAc,WAAW,EAAEH;IACjEC,YAAY,KAAK,GAAGI,aAAa,KAAK;IACtCJ,YAAY,MAAM,GAAGI,aAAa,MAAM;IACxC,OAAO;QACL,MAAMJ;QACN,aAAaI,aAAa,WAAW;QACrC,OAAOL;IACT;AACF;AAEA,MAAMO,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBnB,OAMrC;IAUC,MAAM,EAAEC,OAAO,EAAEmB,wBAAwB,EAAEC,WAAW,EAAE,GAAGrB;IAC3D,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElDsB,OACEH,0BACA;IAEF,MAAMI,+BAA+Bb,wBACnCS;IAEF,MAAMK,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUzB,eAC3B0B,uBAAuB1B,eACvB2B,4BAA4B3B;IAEhC,IAAI4B,eAAeT;IACnB,IAAIU,aAAa/B,QAAQ,QAAQ,CAAC,KAAK;IACvC,IAAIgC,cAAchC,QAAQ,QAAQ,CAAC,MAAM;IACzC,IAAIiC,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjC,QAAQ,YAAY,EAAE;QACxBuB,OACEvB,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFuB,OACEvB,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGF+B,eAAe/B,QAAQ,YAAY,CAAC,WAAW;QAC/CgC,aAAahC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiC,cAAcjC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkC,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAI9B,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMiC,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMrB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUzB,eACZ,CAAC,KAAK,EAAEsB,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOL,0BAAuC;QAChD,MAAMkB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQO,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAL,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIV,UAAUzB,cAAc;QAC1B,MAAM,EAAE,SAASoC,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB1B,MAAMM,aAAa;YAChD,aAAarB,QAAQ,WAAW;QAClC;QAEFJ,aAAa,yBAAyB2C;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1C3C,aAAa,sBAAsB8C,OAAO,KAAK;QAC/C9C,aAAa,yBAAyB8C,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9D9C,aAAa,yBAAyBkD,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnC9C,aAAa,iCAAiC;gBAAEmD;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9CrC,aAAa,+BAA+B;gBAAEqD;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAInD,QAAQ,YAAY,EAAE,MAAM;gBAC9BoD,UAAUpD,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCqD,UAAUrD,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMsD,UAA+BC,uBACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtB1D,aAAa,qBAAqBgD;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,yBACV1C,MACAM,aACA;YAAE,aAAarB,QAAQ,WAAW;QAAC;IAEvC,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,gBACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAjC,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkC,oBACAC,qBACAhC,aACAH,QAAQ,YAAY,EAAE;YAGxBJ,aAAa,WAAWgD;YAExB,MAAMU,UAA+Bc,sBACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBvE,OAKrC;IAQC,MAAM,EAAEC,OAAO,EAAEuE,kBAAkB,EAAEnD,WAAW,EAAE,GAAGrB;IACrD,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0B,eAAe8C,4BAA4BtE;IACjD,MAAMuE,gCAAgCC,0BACpChE,wBAAwB6D;IAE1B,MAAMzD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAMzB,mBAAmB;YACtC,QAAQ2D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAzD,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,yBACb1C,MACAM,aACA;YAAE,aAAarB,QAAQ,WAAW;QAAC;IAEvC,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IAGJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,gBACjBW,aACA7E,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAEFL,aAAa,0BAA0BiF;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D9E,aAAa,wBAAwBkF;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,gBACLe,MACAjF,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAGNL,aAAa,qBAAqBmF;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DnF,aAAa,iBAAiBqF;QAE9B,MAAME,eAAe/E,iBAAiB6E,YAAYlF,QAAQ,QAAQ;QAClE,MAAMqF,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1CvF,aAAa,2BAA2BuF;QAExCR,mBAAmB,MAAM9E,sBAAsB;YAC7CE;YACA,UAAUkF;YACVhF;QACF;QAEAL,aACE,qDACAwF,eACAC,gBACAV,iBAAiB,IAAI,CAAC,KAAK,EAC3BA,iBAAiB,IAAI,CAAC,MAAM,EAC5BA,iBAAiB,KAAK;IAE1B;IAEA,OAAO;QACL,MAAMA,kBAAkB;QACxB,aAAaA,kBAAkB;QAC/B,OAAOA,kBAAkB;QACzB,OAAOD,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeY,qBAAwBxF,OAO7C;IACC,MAAM,EAAEyF,SAAS,EAAExF,OAAO,EAAEyF,aAAa,EAAE5E,gBAAgB,EAAEO,WAAW,EAAE,GACxErB;IACF,MAAM2B,eAAegE;IACrB,MAAMrE,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM2F,wBAAwBC,uBAC5B7F,QAAQ,eAAe,IAAI,IAC3ByF;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKxE;YACL,QAAQ;QACV;IACF;IAGFwE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM7E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAASmE;QACX;KACD;IAED,IAAIhF,kBAAkB;QACpB,MAAMwB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACLuD,iBAAiB,EAClB,GAAG,MAAMC,OAAOjF,MAAMM;IAGvB,IAAI4E;IACJ,IAAI;QACFA,cAAcC,2BAA8BpC;IAC9C,EAAE,OAAOqC,YAAY;QAEnB,MAAMxC,eACJwC,sBAAsBvC,QAAQuC,WAAW,OAAO,GAAGtC,OAAOsC;QAC5D,MAAM,IAAIpC,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLyD;QACAnC;QACAtB;QACAuD;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEjF,WAAyB;IAKzB,MAAMM,eAAe4E;IACrB,MAAMC,aAAaC,0BAA0BJ;IAE7C,MAAMtF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS6E;QACX;KACD;IAED5G,aACE,0DACAyG;IAGF,MAAMzB,SAAS,MAAM0B,SAASvF,MAAMM,aAAa;QAC/C,WAAW;IACb;IAEA,OAAO;QACL,kBAAkBuD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n generateElementByPoint,\n generateElementByRect,\n} from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n scaleImage,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n AIResponseParseError,\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nexport async function buildSearchAreaConfig(options: {\n context: UIContext;\n baseRect: Rect;\n modelFamily: IModelConfig['modelFamily'];\n}): Promise<{ rect: Rect; imageBase64: string; scale: number }> {\n const { context, baseRect, modelFamily } = options;\n const scaleRatio = 2;\n const sectionRect = expandSearchArea(baseRect, context.shotSize);\n\n const croppedResult = await cropByRect(\n context.screenshot.base64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n\n const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);\n sectionRect.width = scaledResult.width;\n sectionRect.height = scaledResult.height;\n return {\n rect: sectionRect,\n imageBase64: scaledResult.imageBase64,\n scale: scaleRatio,\n };\n}\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.shotSize.width;\n let imageHeight = context.shotSize.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig, {\n abortSignal: options.abortSignal,\n });\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Apply offset if searching in a cropped area\n let finalX = pixelX;\n let finalY = pixelY;\n if (options.searchConfig?.rect) {\n finalX += options.searchConfig.rect.left;\n finalY += options.searchConfig.rect.top;\n }\n\n const element: LocateResultElement = generateElementByPoint(\n [finalX, finalY],\n targetElementDescriptionText as string,\n );\n\n resRect = element.rect;\n debugInspect('auto-glm resRect:', resRect);\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n let res: Awaited<\n ReturnType<\n typeof callAIWithObjectResponse<AIElementResponse | [number, number]>\n >\n >;\n try {\n res = await callAIWithObjectResponse<AIElementResponse | [number, number]>(\n msgs,\n modelConfig,\n {\n abortSignal: options.abortSignal,\n },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n parseResult: {\n elements: [],\n errors: [`AI call error: ${errorMessage}`],\n },\n rawResponse,\n usage,\n reasoning_content: undefined,\n };\n }\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n options.searchConfig?.scale,\n );\n\n debugInspect('resRect', resRect);\n\n const element: LocateResultElement = generateElementByRect(\n resRect,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n abortSignal?: AbortSignal;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n scale?: number;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n let result: Awaited<\n ReturnType<typeof callAIWithObjectResponse<AISectionLocatorResponse>>\n >;\n try {\n result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n {\n abortSignal: options.abortSignal,\n },\n );\n } catch (callError) {\n // Return error with usage and rawResponse if available\n const errorMessage =\n callError instanceof Error ? callError.message : String(callError);\n const rawResponse =\n callError instanceof AIResponseParseError\n ? callError.rawResponse\n : errorMessage;\n const usage =\n callError instanceof AIResponseParseError ? callError.usage : undefined;\n return {\n rect: undefined,\n imageBase64: undefined,\n error: `AI call error: ${errorMessage}`,\n rawResponse,\n usage,\n };\n }\n\n let searchAreaConfig:\n | Awaited<ReturnType<typeof buildSearchAreaConfig>>\n | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.shotSize.width,\n context.shotSize.height,\n 0,\n 0,\n context.shotSize.width,\n context.shotSize.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n const expandedRect = expandSearchArea(mergedRect, context.shotSize);\n const originalWidth = expandedRect.width;\n const originalHeight = expandedRect.height;\n debugSection('expanded sectionRect %j', expandedRect);\n\n searchAreaConfig = await buildSearchAreaConfig({\n context,\n baseRect: mergedRect,\n modelFamily,\n });\n\n debugSection(\n 'scaled sectionRect from %dx%d to %dx%d (scale=%d)',\n originalWidth,\n originalHeight,\n searchAreaConfig.rect.width,\n searchAreaConfig.rect.height,\n searchAreaConfig.scale,\n );\n }\n\n return {\n rect: searchAreaConfig?.rect,\n imageBase64: searchAreaConfig?.imageBase64,\n scale: searchAreaConfig?.scale,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n let parseResult: AIDataExtractionResponse<T>;\n try {\n parseResult = parseXMLExtractionResponse<T>(rawResponse);\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n\n return {\n parseResult,\n rawResponse,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n debugInspect('AiJudgeOrderSensitive: description=%s', description);\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","buildSearchAreaConfig","options","context","baseRect","modelFamily","scaleRatio","sectionRect","expandSearchArea","croppedResult","cropByRect","scaledResult","scaleImage","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","targetElementDescription","modelConfig","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","finalX","finalY","element","generateElementByPoint","res","callAIWithObjectResponse","callError","errorMessage","Error","String","rawResponse","AIResponseParseError","undefined","JSON","Array","adaptBboxToRect","generateElementByRect","e","msg","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","searchAreaConfig","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandedRect","originalWidth","originalHeight","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","parseError","AiJudgeOrderSensitive","description","callAIFn","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AA6DA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAEvB,eAAeE,sBAAsBC,OAI3C;IACC,MAAM,EAAEC,OAAO,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGH;IAC3C,MAAMI,aAAa;IACnB,MAAMC,cAAcC,iBAAiBJ,UAAUD,QAAQ,QAAQ;IAE/D,MAAMM,gBAAgB,MAAMC,WAC1BP,QAAQ,UAAU,CAAC,MAAM,EACzBI,aACAF,AAAgB,iBAAhBA;IAGF,MAAMM,eAAe,MAAMC,WAAWH,cAAc,WAAW,EAAEH;IACjEC,YAAY,KAAK,GAAGI,aAAa,KAAK;IACtCJ,YAAY,MAAM,GAAGI,aAAa,MAAM;IACxC,OAAO;QACL,MAAMJ;QACN,aAAaI,aAAa,WAAW;QACrC,OAAOL;IACT;AACF;AAEA,MAAMO,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBnB,OAMrC;IAUC,MAAM,EAAEC,OAAO,EAAEmB,wBAAwB,EAAEC,WAAW,EAAE,GAAGrB;IAC3D,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElDsB,OACEH,0BACA;IAEF,MAAMI,+BAA+Bb,wBACnCS;IAEF,MAAMK,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUzB,eAC3B0B,uBAAuB1B,eACvB2B,4BAA4B3B;IAEhC,IAAI4B,eAAeT;IACnB,IAAIU,aAAa/B,QAAQ,QAAQ,CAAC,KAAK;IACvC,IAAIgC,cAAchC,QAAQ,QAAQ,CAAC,MAAM;IACzC,IAAIiC,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjC,QAAQ,YAAY,EAAE;QACxBuB,OACEvB,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFuB,OACEvB,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGF+B,eAAe/B,QAAQ,YAAY,CAAC,WAAW;QAC/CgC,aAAahC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiC,cAAcjC,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkC,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAI9B,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMiC,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMrB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUzB,eACZ,CAAC,KAAK,EAAEsB,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOL,0BAAuC;QAChD,MAAMkB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQO,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAL,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIV,UAAUzB,cAAc;QAC1B,MAAM,EAAE,SAASoC,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB1B,MAAMM,aAAa;YAChD,aAAarB,QAAQ,WAAW;QAClC;QAEFJ,aAAa,yBAAyB2C;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1C3C,aAAa,sBAAsB8C,OAAO,KAAK;QAC/C9C,aAAa,yBAAyB8C,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9D9C,aAAa,yBAAyBkD,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnC9C,aAAa,iCAAiC;gBAAEmD;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9CrC,aAAa,+BAA+B;gBAAEqD;gBAAQE;YAAO;YAG7D,IAAIC,SAASH;YACb,IAAII,SAASF;YACb,IAAInD,QAAQ,YAAY,EAAE,MAAM;gBAC9BoD,UAAUpD,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBACxCqD,UAAUrD,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YACzC;YAEA,MAAMsD,UAA+BC,uBACnC;gBAACH;gBAAQC;aAAO,EAChB7B;YAGFoB,UAAUU,QAAQ,IAAI;YACtB1D,aAAa,qBAAqBgD;YAElC,IAAIU,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMV;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,IAAIc;IAKJ,IAAI;QACFA,MAAM,MAAMC,yBACV1C,MACAM,aACA;YACE,aAAarB,QAAQ,WAAW;QAClC;IAEJ,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAa;gBACX,UAAU,EAAE;gBACZ,QAAQ;oBAAC,CAAC,eAAe,EAAEL,cAAc;iBAAC;YAC5C;YACAG;YACAtB;YACA,mBAAmBwB;QACrB;IACF;IAEA,MAAMF,cAAcG,KAAK,SAAS,CAACT,IAAI,OAAO;IAE9C,IAAIZ;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYU,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBU,MAAM,OAAO,CAACV,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAZ,UAAUuB,gBACRX,IAAI,OAAO,CAAC,IAAI,EAChBxB,YACAC,aACAjC,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkC,oBACAC,qBACAhC,aACAH,QAAQ,YAAY,EAAE;YAGxBJ,aAAa,WAAWgD;YAExB,MAAMU,UAA+Bc,sBACnCxB,SACApB;YAEFsB,SAAS,EAAE;YAEX,IAAIQ,SACFT,kBAAkB;gBAACS;aAAQ;QAE/B;IACF,EAAE,OAAOe,GAAG;QACV,MAAMC,MACJD,aAAaT,QACT,CAAC,sBAAsB,EAAES,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACvB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEwB,IAAI,CAAC,CAAC;aAFtBxB,SAAS;YAACwB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAM1B;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAgB;QACA,OAAON,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBvE,OAKrC;IAQC,MAAM,EAAEC,OAAO,EAAEuE,kBAAkB,EAAEnD,WAAW,EAAE,GAAGrB;IACrD,MAAM,EAAEG,WAAW,EAAE,GAAGkB;IACxB,MAAMC,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0B,eAAe8C,4BAA4BtE;IACjD,MAAMuE,gCAAgCC,0BACpChE,wBAAwB6D;IAE1B,MAAMzD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMlC,SAAS,MAAMzB,mBAAmB;YACtC,QAAQ2D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAzD,KAAK,IAAI,IAAIuB;IACf;IAEA,IAAIsC;IAGJ,IAAI;QACFA,SAAS,MAAMnB,yBACb1C,MACAM,aACA;YACE,aAAarB,QAAQ,WAAW;QAClC;IAEJ,EAAE,OAAO0D,WAAW;QAElB,MAAMC,eACJD,qBAAqBE,QAAQF,UAAU,OAAO,GAAGG,OAAOH;QAC1D,MAAMI,cACJJ,qBAAqBK,uBACjBL,UAAU,WAAW,GACrBC;QACN,MAAMnB,QACJkB,qBAAqBK,uBAAuBL,UAAU,KAAK,GAAGM;QAChE,OAAO;YACL,MAAMA;YACN,aAAaA;YACb,OAAO,CAAC,eAAe,EAAEL,cAAc;YACvCG;YACAtB;QACF;IACF;IAEA,IAAIqC;IAGJ,MAAMC,cAAcF,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIE,aAAa;QACf,MAAMC,aAAaZ,gBACjBW,aACA7E,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAEFL,aAAa,0BAA0BiF;QAEvC,MAAMC,oBAAoBJ,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D9E,aAAa,wBAAwBkF;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAShB,MAAM,OAAO,CAACgB,OAC/B,GAAG,CAAC,CAACA,OACGf,gBACLe,MACAjF,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvB,GACA,GACAA,QAAQ,QAAQ,CAAC,KAAK,EACtBA,QAAQ,QAAQ,CAAC,MAAM,EACvBE;QAGNL,aAAa,qBAAqBmF;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DnF,aAAa,iBAAiBqF;QAE9B,MAAME,eAAe/E,iBAAiB6E,YAAYlF,QAAQ,QAAQ;QAClE,MAAMqF,gBAAgBD,aAAa,KAAK;QACxC,MAAME,iBAAiBF,aAAa,MAAM;QAC1CvF,aAAa,2BAA2BuF;QAExCR,mBAAmB,MAAM9E,sBAAsB;YAC7CE;YACA,UAAUkF;YACVhF;QACF;QAEAL,aACE,qDACAwF,eACAC,gBACAV,iBAAiB,IAAI,CAAC,KAAK,EAC3BA,iBAAiB,IAAI,CAAC,MAAM,EAC5BA,iBAAiB,KAAK;IAE1B;IAEA,OAAO;QACL,MAAMA,kBAAkB;QACxB,aAAaA,kBAAkB;QAC/B,OAAOA,kBAAkB;QACzB,OAAOD,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeY,qBAAwBxF,OAO7C;IACC,MAAM,EAAEyF,SAAS,EAAExF,OAAO,EAAEyF,aAAa,EAAE5E,gBAAgB,EAAEO,WAAW,EAAE,GACxErB;IACF,MAAM2B,eAAegE;IACrB,MAAMrE,mBAAmBrB,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM2F,wBAAwBC,uBAC5B7F,QAAQ,eAAe,IAAI,IAC3ByF;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKxE;YACL,QAAQ;QACV;IACF;IAGFwE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM7E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAASmE;QACX;KACD;IAED,IAAIhF,kBAAkB;QACpB,MAAMwB,SAAS,MAAMzB,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EACJ,SAASwB,WAAW,EACpBtB,KAAK,EACLuD,iBAAiB,EAClB,GAAG,MAAMC,OAAOjF,MAAMM;IAGvB,IAAI4E;IACJ,IAAI;QACFA,cAAcC,2BAA8BpC;IAC9C,EAAE,OAAOqC,YAAY;QAEnB,MAAMxC,eACJwC,sBAAsBvC,QAAQuC,WAAW,OAAO,GAAGtC,OAAOsC;QAC5D,MAAM,IAAIpC,qBACR,CAAC,iBAAiB,EAAEJ,cAAc,EAClCG,aACAtB;IAEJ;IAEA,OAAO;QACLyD;QACAnC;QACAtB;QACAuD;IACF;AACF;AAEO,eAAeK,sBACpBC,WAAmB,EACnBC,QAAwE,EACxEjF,WAAyB;IAKzB,MAAMM,eAAe4E;IACrB,MAAMC,aAAaC,0BAA0BJ;IAE7C,MAAMtF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASY;QAAa;QACxC;YACE,MAAM;YACN,SAAS6E;QACX;KACD;IAED5G,aAAa,yCAAyCyG;IAEtD,MAAMzB,SAAS,MAAM0B,SAASvF,MAAMM;IAEpC,OAAO;QACL,kBAAkBuD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -75,7 +75,7 @@ async function plan(userInstruction, opts) {
|
|
|
75
75
|
const { shotSize } = context;
|
|
76
76
|
const screenshotBase64 = context.screenshot.base64;
|
|
77
77
|
const { modelFamily } = modelConfig;
|
|
78
|
-
const includeSubGoals = true === opts.
|
|
78
|
+
const includeSubGoals = true === opts.planningModeDeepThink;
|
|
79
79
|
const systemPrompt = await systemPromptToTaskPlanning({
|
|
80
80
|
actionSpace: opts.actionSpace,
|
|
81
81
|
modelFamily,
|
|
@@ -155,7 +155,6 @@ async function plan(userInstruction, opts) {
|
|
|
155
155
|
...historyLog
|
|
156
156
|
];
|
|
157
157
|
let { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelConfig, {
|
|
158
|
-
deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink,
|
|
159
158
|
abortSignal: opts.abortSignal
|
|
160
159
|
});
|
|
161
160
|
let planFromAI;
|
|
@@ -164,7 +163,6 @@ async function plan(userInstruction, opts) {
|
|
|
164
163
|
planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
|
|
165
164
|
} catch {
|
|
166
165
|
const retry = await callAI(msgs, modelConfig, {
|
|
167
|
-
deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink,
|
|
168
166
|
abortSignal: opts.abortSignal
|
|
169
167
|
});
|
|
170
168
|
rawResponse = retry.content;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n // Only enable sub-goals when deepThink is true\n const includeSubGoals = opts.deepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In deepThink mode: show full sub-goals with logs\n // In non-deepThink mode: show historical execution logs\n const subGoalsText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const subGoalsSection = subGoalsText ? `\\n\\n${subGoalsText}` : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${subGoalsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${subGoalsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n abortSignal: opts.abortSignal,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n } catch {\n const retry = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n abortSignal: opts.abortSignal,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed (only when deepThink is enabled)\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Update sub-goals in conversation history based on response (only when deepThink is enabled)\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // In non-deepThink mode, accumulate logs as historical execution steps\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","subGoalsText","subGoalsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;AA+BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQC,cAAcf,gBAAgBP;QACxC,EAAE,OAAOuB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAWC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAE5B,WAAW,EAAE,GAAG6B;IAGxB,MAAMI,kBAAkBN,AAAmB,SAAnBA,KAAK,SAAS;IAEtC,MAAMO,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7B3B;QACA,aAAa2B,KAAK,WAAW;QAC7B,gBAAgB;QAChBM;IACF;IAEA,IAAIG,eAAeJ;IACnB,IAAIK,aAAaN,SAAS,KAAK;IAC/B,IAAIO,cAAcP,SAAS,MAAM;IAKjC,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMuC,eAAe,MAAMC,4BAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBd,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMe,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEf,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIiB;IAKJ,MAAMC,eAAeX,kBACjBH,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMe,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAG/D,MAAME,eAAehB,oBAAoB,cAAc;IACvD,MAAMiB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIhB,oBAAoB,sBAAsB,EAAE;QAC9Ca,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGb,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEiB,kBAAkBF,iBAAiB;gBAChN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEa,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,iBAAiB;YAC3E;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACa;IAG3Bb,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMkB,aAAalB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMsB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMpB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAef,SAAYe,KAAK,SAAS;QAClE,aAAaA,KAAK,WAAW;IAC/B;IAGA,IAAI2B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAalD;QACrD,EAAE,OAAM;YACN,MAAMuD,QAAQ,MAAMF,OAAOJ,MAAMpB,aAAa;gBAC5C,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAef,SAAYe,KAAK,SAAS;gBAClE,aAAaA,KAAK,WAAW;YAC/B;YACAuB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAalD;QACrD;QAEA,IAAIsD,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjEzD,QACE;YAEFyD,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C3D,MAAM;YACN8D,yBAAyB;YAEzB,IAAIxB,iBACFH,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM4B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAAS7B,KAAK,WAAW;YAC1D8B;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BzB,MAAM,+BAA+BkE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENlE,MAAM,gBAAgBmE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,gBAAgBjE,AAAgBY,WAAhBZ,aAElBmB,OAAO,KAAK,CAAC6C,MAAM,GAAGE,cACpBD,cACA5B,YACAC,aACAtC;YAGN;QACF;QAGA,IAAIiC,iBAAiB;YACnB,IAAIqB,WAAW,cAAc,EAAE,QAC7BxB,oBAAoB,aAAa,CAACwB,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMa,SAASb,WAAW,mBAAmB,CAChDxB,oBAAoB,mBAAmB,CAACqC;YAI5C,IAAIb,WAAW,GAAG,EAChBxB,oBAAoB,gBAAgB,CAACwB,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChBxB,oBAAoB,mBAAmB,CAACwB,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnBxB,oBAAoB,YAAY,CAACwB,WAAW,MAAM;QAGpDxB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMoB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOU,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC;IAEJ;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport {\n extractXMLTag,\n parseMarkFinishedIndexes,\n parseSubGoalsFromXML,\n} from './prompt/util';\nimport {\n AIResponseParseError,\n callAI,\n safeParseJson,\n} from './service-caller/index';\n\nconst debug = getDebug('planning');\nconst warnLog = getDebug('planning', { console: true });\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const memory = extractXMLTag(xmlString, 'memory');\n const log = extractXMLTag(xmlString, 'log') || '';\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse <complete> tag with success attribute\n const completeGoalRegex =\n /<complete\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete>/i;\n const completeGoalMatch = xmlString.match(completeGoalRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeGoalMatch) {\n finalizeSuccess = completeGoalMatch[1] === 'true';\n finalizeMessage = completeGoalMatch[2]?.trim() || undefined;\n }\n\n // Parse sub-goal related tags\n const updatePlanContent = extractXMLTag(xmlString, 'update-plan-content');\n const markSubGoalDone = extractXMLTag(xmlString, 'mark-sub-goal-done');\n\n const updateSubGoals = updatePlanContent\n ? parseSubGoalsFromXML(updatePlanContent)\n : undefined;\n const markFinishedIndexes = markSubGoalDone\n ? parseMarkFinishedIndexes(markSubGoalDone)\n : undefined;\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n // Strip any trailing XML tags that LLM might have leaked into the action type\n // e.g. \"KeyboardPress</action-type>\\n<action-param-json>\" -> \"KeyboardPress\"\n const type = actionType.split('<')[0].trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(memory ? { memory } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n ...(updateSubGoals?.length ? { updateSubGoals } : {}),\n ...(markFinishedIndexes?.length ? { markFinishedIndexes } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n // Controls aiAct planning prompt shape and state updates, such as sub-goals.\n planningModeDeepThink?: boolean;\n abortSignal?: AbortSignal;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { shotSize } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n // Only enable sub-goals when aiAct is in deep-thinking planning mode.\n const includeSubGoals = opts.planningModeDeepThink === true;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: true, // always include thought\n includeSubGoals,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = shotSize.width;\n let imageHeight = shotSize.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n // Build sub-goal status text to include in the message\n // In planning deep-think mode: show full sub-goals with logs\n // Otherwise: show historical execution logs\n const subGoalsText = includeSubGoals\n ? conversationHistory.subGoalsToText()\n : conversationHistory.historicalLogsToText();\n const subGoalsSection = subGoalsText ? `\\n\\n${subGoalsText}` : '';\n\n // Build memories text to include in the message\n const memoriesText = conversationHistory.memoriesToText();\n const memoriesSection = memoriesText ? `\\n\\n${memoriesText}` : '';\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The previous action has been executed, here is the latest screenshot. Please continue according to the instruction.${memoriesSection}${subGoalsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the latest screenshot${memoriesSection}${subGoalsSection}`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n\n // Compress history if it exceeds the threshold to avoid context overflow\n conversationHistory.compressHistory(50, 20);\n\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n let {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n });\n\n // Parse XML response to JSON object, retry once on parse failure\n let planFromAI: RawResponsePlanningAIResponse;\n try {\n try {\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n } catch {\n const retry = await callAI(msgs, modelConfig, {\n abortSignal: opts.abortSignal,\n });\n rawResponse = retry.content;\n usage = retry.usage;\n reasoning_content = retry.reasoning_content;\n planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n }\n\n if (planFromAI.action && planFromAI.finalizeSuccess !== undefined) {\n warnLog(\n 'Planning response included both an action and <complete>; ignoring <complete> output.',\n );\n planFromAI.finalizeMessage = undefined;\n planFromAI.finalizeSuccess = undefined;\n }\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is completed via <complete> tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task completed via <complete> tag, stop planning');\n shouldContinuePlanning = false;\n // Mark all sub-goals as finished when goal is completed in planning deep-think mode.\n if (includeSubGoals) {\n conversationHistory.markAllSubGoalsFinished();\n }\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n modelFamily,\n );\n }\n });\n });\n\n // Update sub-goals in conversation history only in planning deep-think mode.\n if (includeSubGoals) {\n if (planFromAI.updateSubGoals?.length) {\n conversationHistory.mergeSubGoals(planFromAI.updateSubGoals);\n }\n if (planFromAI.markFinishedIndexes?.length) {\n for (const index of planFromAI.markFinishedIndexes) {\n conversationHistory.markSubGoalFinished(index);\n }\n }\n // Append the planning log to the currently running sub-goal\n if (planFromAI.log) {\n conversationHistory.appendSubGoalLog(planFromAI.log);\n }\n } else {\n // Without planning deep-think mode, accumulate logs as historical execution steps.\n if (planFromAI.log) {\n conversationHistory.appendHistoricalLog(planFromAI.log);\n }\n }\n\n // Append memory to conversation history if present\n if (planFromAI.memory) {\n conversationHistory.appendMemory(planFromAI.memory);\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `XML parse error: ${errorMessage}`,\n rawResponse,\n usage,\n );\n }\n}\n"],"names":["debug","getDebug","warnLog","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","memory","log","error","actionType","actionParamStr","completeGoalRegex","completeGoalMatch","finalizeMessage","finalizeSuccess","undefined","updatePlanContent","markSubGoalDone","updateSubGoals","parseSubGoalsFromXML","markFinishedIndexes","parseMarkFinishedIndexes","action","type","param","safeParseJson","e","Error","plan","userInstruction","opts","context","modelConfig","conversationHistory","shotSize","screenshotBase64","includeSubGoals","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","subGoalsText","subGoalsSection","memoriesText","memoriesSection","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","retry","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","index","parseError","errorMessage","String","AIResponseParseError"],"mappings":";;;;;;;AA8BA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,UAAUD,SAAS,YAAY;IAAE,SAAS;AAAK;AAK9C,SAASE,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,SAASD,cAAcH,WAAW;IACxC,MAAMK,MAAMF,cAAcH,WAAW,UAAU;IAC/C,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,MAAMC,oBAAoBX,cAAcH,WAAW;IACnD,MAAMe,kBAAkBZ,cAAcH,WAAW;IAEjD,MAAMgB,iBAAiBF,oBACnBG,qBAAqBH,qBACrBD;IACJ,MAAMK,sBAAsBH,kBACxBI,yBAAyBJ,mBACzBF;IAGJ,IAAIO,SAAc;IAClB,IAAIb,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QAGrD,MAAMc,OAAOd,WAAW,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI;QAC1C,IAAIe;QAEJ,IAAId,gBACF,IAAI;YAEFc,QAAQC,cAAcf,gBAAgBP;QACxC,EAAE,OAAOuB,GAAG;YACV,MAAM,IAAIC,MAAM,CAAC,mCAAmC,EAAED,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUT,WAAVS,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIpB,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,SAAS;YAAEA;QAAO,IAAI,CAAC,CAAC;QAC5BC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1Bc;QACA,GAAIT,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAII,gBAAgB,SAAS;YAAEA;QAAe,IAAI,CAAC,CAAC;QACpD,GAAIE,qBAAqB,SAAS;YAAEA;QAAoB,IAAI,CAAC,CAAC;IAChE;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAYC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,QAAQ,EAAE,GAAGH;IACrB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAE5B,WAAW,EAAE,GAAG6B;IAGxB,MAAMI,kBAAkBN,AAA+B,SAA/BA,KAAK,qBAAqB;IAElD,MAAMO,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7B3B;QACA,aAAa2B,KAAK,WAAW;QAC7B,gBAAgB;QAChBM;IACF;IAEA,IAAIG,eAAeJ;IACnB,IAAIK,aAAaN,SAAS,KAAK;IAC/B,IAAIO,cAAcP,SAAS,MAAM;IAKjC,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMuC,eAAe,MAAMC,4BAA4BJ;QACvDC,aAAaE,aAAa,KAAK;QAC/BD,cAAcC,aAAa,MAAM;QACjCH,eAAeG,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBd,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMe,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEf,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIiB;IAKJ,MAAMC,eAAeX,kBACjBH,oBAAoB,cAAc,KAClCA,oBAAoB,oBAAoB;IAC5C,MAAMe,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAG/D,MAAME,eAAehB,oBAAoB,cAAc;IACvD,MAAMiB,kBAAkBD,eAAe,CAAC,IAAI,EAAEA,cAAc,GAAG;IAE/D,IAAIhB,oBAAoB,sBAAsB,EAAE;QAC9Ca,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGb,oBAAoB,sBAAsB,CAAC,qHAAqH,EAAEiB,kBAAkBF,iBAAiB;gBAChN;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEa,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM,CAAC,6BAA6B,EAAEI,kBAAkBF,iBAAiB;YAC3E;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACa;IAG3Bb,oBAAoB,eAAe,CAAC,IAAI;IAExC,MAAMkB,aAAalB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMsB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASf;QAAa;WACrCQ;WACAM;KACJ;IAED,IAAI,EACF,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMpB,aAAa;QAClC,aAAaF,KAAK,WAAW;IAC/B;IAGA,IAAI2B;IACJ,IAAI;QACF,IAAI;YACFA,aAAaxD,yBAAyBoD,aAAalD;QACrD,EAAE,OAAM;YACN,MAAMuD,QAAQ,MAAMF,OAAOJ,MAAMpB,aAAa;gBAC5C,aAAaF,KAAK,WAAW;YAC/B;YACAuB,cAAcK,MAAM,OAAO;YAC3BJ,QAAQI,MAAM,KAAK;YACnBH,oBAAoBG,MAAM,iBAAiB;YAC3CD,aAAaxD,yBAAyBoD,aAAalD;QACrD;QAEA,IAAIsD,WAAW,MAAM,IAAIA,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YACjEzD,QACE;YAEFyD,WAAW,eAAe,GAAG1C;YAC7B0C,WAAW,eAAe,GAAG1C;QAC/B;QAEA,MAAM4C,UAAUF,WAAW,MAAM,GAAG;YAACA,WAAW,MAAM;SAAC,GAAG,EAAE;QAC5D,IAAIG,yBAAyB;QAG7B,IAAIH,AAA+B1C,WAA/B0C,WAAW,eAAe,EAAgB;YAC5C3D,MAAM;YACN8D,yBAAyB;YAEzB,IAAIxB,iBACFH,oBAAoB,uBAAuB;QAE/C;QAEA,MAAM4B,cAAkC;YACtC,GAAGJ,UAAU;YACbE;YACAN;YACAC;YACAC;YACA,UAAUO,uBAAuBH,SAAS7B,KAAK,WAAW;YAC1D8B;QACF;QAEAG,OAAON,YAAY;QAEnBE,QAAQ,OAAO,CAAC,CAACrC;YACf,MAAMC,OAAOD,OAAO,IAAI;YACxB,MAAM0C,sBAAsBlC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACR,SAAWA,OAAO,IAAI,KAAKC;YAG9BzB,MAAM,+BAA+BkE;YACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;YAENlE,MAAM,gBAAgBmE;YAEtBA,aAAa,OAAO,CAAC,CAACE;gBACpB,MAAMC,eAAe9C,OAAO,KAAK,CAAC6C,MAAM;gBACxC,IAAIC,gBAAgBjE,AAAgBY,WAAhBZ,aAElBmB,OAAO,KAAK,CAAC6C,MAAM,GAAGE,cACpBD,cACA5B,YACAC,aACAtC;YAGN;QACF;QAGA,IAAIiC,iBAAiB;YACnB,IAAIqB,WAAW,cAAc,EAAE,QAC7BxB,oBAAoB,aAAa,CAACwB,WAAW,cAAc;YAE7D,IAAIA,WAAW,mBAAmB,EAAE,QAClC,KAAK,MAAMa,SAASb,WAAW,mBAAmB,CAChDxB,oBAAoB,mBAAmB,CAACqC;YAI5C,IAAIb,WAAW,GAAG,EAChBxB,oBAAoB,gBAAgB,CAACwB,WAAW,GAAG;QAEvD,OAEE,IAAIA,WAAW,GAAG,EAChBxB,oBAAoB,mBAAmB,CAACwB,WAAW,GAAG;QAK1D,IAAIA,WAAW,MAAM,EACnBxB,oBAAoB,YAAY,CAACwB,WAAW,MAAM;QAGpDxB,oBAAoB,MAAM,CAAC;YACzB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAMoB;gBACR;aACD;QACH;QAEA,OAAOQ;IACT,EAAE,OAAOU,YAAY;QAEnB,MAAMC,eACJD,sBAAsB5C,QAAQ4C,WAAW,OAAO,GAAGE,OAAOF;QAC5D,MAAM,IAAIG,qBACR,CAAC,iBAAiB,EAAEF,cAAc,EAClCnB,aACAC;IAEJ;AACF"}
|
|
@@ -105,13 +105,12 @@ const extractImageInputs = (message)=>{
|
|
|
105
105
|
}
|
|
106
106
|
return inputs;
|
|
107
107
|
};
|
|
108
|
-
const resolveCodexReasoningEffort = ({
|
|
109
|
-
if (true ===
|
|
110
|
-
if (false ===
|
|
108
|
+
const resolveCodexReasoningEffort = ({ reasoningEnabled, modelConfig })=>{
|
|
109
|
+
if (true === reasoningEnabled) return 'high';
|
|
110
|
+
if (false === reasoningEnabled) return 'none';
|
|
111
111
|
const normalized = modelConfig.reasoningEffort?.trim().toLowerCase();
|
|
112
|
-
if ('low' === normalized || 'medium' === normalized || 'high' === normalized || 'xhigh' === normalized) return normalized;
|
|
113
|
-
|
|
114
|
-
if (false === modelConfig.reasoningEnabled) return 'low';
|
|
112
|
+
if ('none' === normalized || 'minimal' === normalized || 'low' === normalized || 'medium' === normalized || 'high' === normalized || 'xhigh' === normalized) return normalized;
|
|
113
|
+
return 'none';
|
|
115
114
|
};
|
|
116
115
|
const buildCodexTurnPayloadFromMessages = (messages)=>{
|
|
117
116
|
const developerInstructionParts = [];
|
|
@@ -175,14 +174,14 @@ class CodexAppServerConnection {
|
|
|
175
174
|
isClosed() {
|
|
176
175
|
return this.closed;
|
|
177
176
|
}
|
|
178
|
-
async runTurn({ messages, modelConfig, stream, onChunk,
|
|
177
|
+
async runTurn({ messages, modelConfig, stream, onChunk, reasoningEnabled, abortSignal }) {
|
|
179
178
|
const startTime = Date.now();
|
|
180
179
|
const timeoutMs = modelConfig.timeout || CODEX_DEFAULT_TIMEOUT_MS;
|
|
181
180
|
const deadlineAt = Date.now() + timeoutMs;
|
|
182
181
|
const isStreaming = !!(stream && onChunk);
|
|
183
182
|
const { developerInstructions, input } = buildCodexTurnPayloadFromMessages(messages);
|
|
184
183
|
const effort = resolveCodexReasoningEffort({
|
|
185
|
-
|
|
184
|
+
reasoningEnabled,
|
|
186
185
|
modelConfig
|
|
187
186
|
});
|
|
188
187
|
let threadId;
|
|
@@ -515,7 +514,7 @@ class CodexAppServerConnection {
|
|
|
515
514
|
}
|
|
516
515
|
}
|
|
517
516
|
class CodexAppServerConnectionManager {
|
|
518
|
-
async runTurn({ messages, modelConfig, stream, onChunk,
|
|
517
|
+
async runTurn({ messages, modelConfig, stream, onChunk, reasoningEnabled, abortSignal }) {
|
|
519
518
|
return this.runner.run(async ()=>{
|
|
520
519
|
const connection = await this.getConnection();
|
|
521
520
|
try {
|
|
@@ -524,7 +523,7 @@ class CodexAppServerConnectionManager {
|
|
|
524
523
|
modelConfig,
|
|
525
524
|
stream,
|
|
526
525
|
onChunk,
|
|
527
|
-
|
|
526
|
+
reasoningEnabled,
|
|
528
527
|
abortSignal
|
|
529
528
|
});
|
|
530
529
|
} catch (error) {
|
|
@@ -563,7 +562,7 @@ async function callAIWithCodexAppServer(messages, modelConfig, options) {
|
|
|
563
562
|
modelConfig,
|
|
564
563
|
stream: options?.stream,
|
|
565
564
|
onChunk: options?.onChunk,
|
|
566
|
-
|
|
565
|
+
reasoningEnabled: options?.reasoningEnabled,
|
|
567
566
|
abortSignal: options?.abortSignal
|
|
568
567
|
});
|
|
569
568
|
}
|