@midscene/core 1.2.2-beta-20260120033218.0 → 1.2.3-beta-20260120082504.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/es/agent/agent.mjs +3 -3
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +4 -2
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +9 -5
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +1 -1
  8. package/dist/es/ai-model/inspect.mjs +7 -6
  9. package/dist/es/ai-model/inspect.mjs.map +1 -1
  10. package/dist/es/ai-model/llm-planning.mjs +60 -6
  11. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  12. package/dist/es/ai-model/prompt/extraction.mjs +51 -53
  13. package/dist/es/ai-model/prompt/extraction.mjs.map +1 -1
  14. package/dist/es/ai-model/prompt/llm-planning.mjs +64 -49
  15. package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
  16. package/dist/es/ai-model/prompt/util.mjs +6 -1
  17. package/dist/es/ai-model/prompt/util.mjs.map +1 -1
  18. package/dist/es/device/index.mjs +2 -14
  19. package/dist/es/device/index.mjs.map +1 -1
  20. package/dist/es/types.mjs.map +1 -1
  21. package/dist/es/utils.mjs +2 -2
  22. package/dist/lib/agent/agent.js +2 -2
  23. package/dist/lib/agent/agent.js.map +1 -1
  24. package/dist/lib/agent/task-builder.js +4 -2
  25. package/dist/lib/agent/task-builder.js.map +1 -1
  26. package/dist/lib/agent/tasks.js +9 -5
  27. package/dist/lib/agent/tasks.js.map +1 -1
  28. package/dist/lib/agent/utils.js +1 -1
  29. package/dist/lib/ai-model/inspect.js +5 -4
  30. package/dist/lib/ai-model/inspect.js.map +1 -1
  31. package/dist/lib/ai-model/llm-planning.js +60 -3
  32. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  33. package/dist/lib/ai-model/prompt/extraction.js +53 -55
  34. package/dist/lib/ai-model/prompt/extraction.js.map +1 -1
  35. package/dist/lib/ai-model/prompt/llm-planning.js +64 -49
  36. package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
  37. package/dist/lib/ai-model/prompt/util.js +8 -0
  38. package/dist/lib/ai-model/prompt/util.js.map +1 -1
  39. package/dist/lib/device/index.js +16 -34
  40. package/dist/lib/device/index.js.map +1 -1
  41. package/dist/lib/types.js.map +1 -1
  42. package/dist/lib/utils.js +2 -2
  43. package/dist/types/agent/task-builder.d.ts +3 -1
  44. package/dist/types/agent/tasks.d.ts +2 -0
  45. package/dist/types/ai-model/inspect.d.ts +2 -2
  46. package/dist/types/ai-model/llm-planning.d.ts +6 -2
  47. package/dist/types/ai-model/prompt/extraction.d.ts +5 -2
  48. package/dist/types/ai-model/prompt/util.d.ts +7 -0
  49. package/dist/types/device/index.d.ts +0 -11
  50. package/dist/types/types.d.ts +8 -0
  51. package/dist/types/yaml.d.ts +1 -5
  52. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import {\n ConversationHistory,\n autoGLMPlanning,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeepThinkOption,\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n fileChooserAccept?: string[],\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n deepThink,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { modelFamily } = modelConfigForPlanning;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl = isUITars(modelFamily)\n ? uiTarsPlanning\n : isAutoGLM(modelFamily)\n ? autoGLMPlanning\n : plan;\n\n const planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n note,\n error,\n usage,\n rawResponse,\n reasoning_content,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n note,\n yamlFlow: planResult.yamlFlow,\n output: outputString,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n // todo: set time string\n\n try {\n const result = await session.appendAndRun(executables.tasks);\n outputString = result?.output;\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump, reasoning_content } = extractResult;\n applyDump(dump);\n task.reasoning_content = reasoning_content;\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","withFileChooser","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","modelFamily","actionSpace","action","Array","console","planImpl","isUITars","uiTarsPlanning","isAutoGLM","autoGLMPlanning","plan","planResult","JSON","actions","thought","log","note","error","usage","rawResponse","reasoning_content","executables","getReadableTimeString","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","timeRemaining","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAyDA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAoBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA6BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BC,iBAA4B,EAS5B;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAED,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC;IAGN;IAEA,MAAc,UACZN,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAS3B;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIoB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJP,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEgB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAGJ,MAAO,KAAM;YACX,MAAMhB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;gBACF;gBACA,UAAU,OAAOd,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEqB,WAAW,EAAE,GAAG9B;oBAExB,MAAM+B,cAAc,IAAI,CAAC,cAAc;oBACvCxC,MACE,sCACAwC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDtB,OAAOuB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WAAWC,SAASN,eACtBO,iBACAC,UAAUR,eACRS,kBACAC;oBAEN,MAAMC,aAAa,MAAMN,SAAS5B,MAAM,eAAe,EAAE;wBACvD,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CwB;wBACA,aAAa/B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACAC;oBACF;oBACA9B,MAAM,cAAcmD,KAAK,SAAS,CAACD,YAAY,MAAM;oBAErD,MAAM,EACJE,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,IAAI,EACJC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,iBAAiB,EAClB,GAAGT;oBAEJjC,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCyC;oBACF;oBACAzC,gBAAgB,IAAI,CAAC,KAAK,GAAGwC;oBAC7BxC,gBAAgB,IAAI,CAAC,iBAAiB,GAAG0C;oBACzC1C,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAASmC,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUL,WAAW,QAAQ;wBAC7B,QAAQZ;wBACR,wBAAwBY,WAAW,sBAAsB;oBAC3D;oBACAjC,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACqC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMJ,aAAa5B,QAAQ;YAG3B,MAAMd,QAAQ0C,YAAY,WAAW,EAAE;YACvChB,SAAS,IAAI,IAAKgB,YAAY,YAAY,EAAE;YAE5C,IAAIU;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CpD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAO6B,OAAO;gBACd,OAAO3C,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAE2C,MAAM,SAAS,EAAEL,KAAK,SAAS,CAC5E3C,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDmC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAKnD,IAAI;gBACF,MAAMrB,SAAS,MAAMT,QAAQ,YAAY,CAAC+C,YAAY,KAAK;gBAC3DtB,eAAehB,QAAQ;YACzB,EAAE,OAAOkC,OAAY;gBAEnBnB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAEwB,wBAAwB,iCAAiC,EAAEL,OAAO,WAAWM,OAAON,QAAQ;gBACvJxD,MACE,yFACAwD,iBAAiBO,QAAQP,MAAM,OAAO,GAAGM,OAAON,QAChD,6CACAnB;YAEJ;YAEA,IAAIA,8BAA8BnC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAACqC,YAAY,wBACf;YAIF,EAAEjB;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM6B,WAAW,CAAC,UAAU,EAAE7B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOtB,QAAQ,eAAe,CAACmD;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAEH,wBAAwB,gDAAgD,CAAC;QAExI;QAEA,OAAO;YACL,QAAQ;gBACN3B;gBACA,QAAQI;YACV;YACAlB;QACF;IACF;IAEQ,oBACN6C,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOlD,OAAOuD;gBACtB,MAAM,EAAExD,IAAI,EAAE,GAAGwD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ3D,KAAK,GAAG,GAAG;wBACT2D;oBACF;gBACF;gBAGA,MAAMxD,YAAYqD,YAAY,SAAS;gBACvCpD,OAAOD,WAAW;gBAElB,MAAMyD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DpE,MAAM;oBACN,MAAMiF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOb,OAAO;oBACd,IAAIA,iBAAiB2B,cACnBV,UAAUjB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE4B,IAAI,EAAE3B,KAAK,EAAEJ,OAAO,EAAEqB,IAAI,EAAEf,iBAAiB,EAAE,GAAGoB;gBAC1DN,UAAUC;gBACV3D,KAAK,iBAAiB,GAAG4C;gBAEzB,IAAI0B,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBACV;oBACLlE,OACEiE,MAAM,CAACP,YAAY,KAAKzC,QACxB;oBAEFiD,eAAgBD,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCtE,KAAK,KAAK,GAAG0C;oBACb1C,KAAK,OAAO,GAAGsC;oBACf,MAAM,IAAIU,MAAM,CAAC,kBAAkB,EAAEV,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQgC;oBACR,KAAKb;oBACLf;oBACAJ;gBACF;YACF;QACF;QAEA,OAAOiB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMxD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEmD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASf,KAAK,SAAS,CAACe;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAMjD,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACyD;QAE1C,IAAI,CAAChD,QACH,MAAM,IAAIyC,MACR;QAIJ,MAAM,EAAExC,MAAM,EAAE8B,OAAO,EAAE,GAAG/B;QAE5B,OAAO;YACLC;YACA8B;YACAjC;QACF;IACF;IAEA,MAAM,aAAakE,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBpB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEsB,UAAU,EAAEpB,gBAAgB,EAAE,GAAGqB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM5E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW6E;QAE1B,MAAMvE,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJ+E,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG5B;QACJ,MAAM6B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEA7E,OAAOqE,WAAW;QAClBrE,OAAOyE,WAAW;QAClBzE,OAAO0E,iBAAiB;QAExB1E,OACE0E,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAMhC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAmB,YACAtB,aACA8B,mBACA5B;YAGF,MAAM/C,SAAU,MAAMT,QAAQ,YAAY,CAACyD;YAO3C,IAAIhD,QAAQ,QACV,OAAO;gBACL,QAAQc;gBACRhB;YACF;YAGFiF,eACE/E,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEmE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,gBAAgBX,kBAAmBU,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAMG,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQD;gBACV;gBACA,MAAM3F,QAAQ,MAAM,CAAC4F;YACvB;QACF;QAEA,OAAO5F,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEwF,cAAc;IACnE;IApmBA,YACEK,iBAAoC,EACpCC,OAAgB,EAChBC,IAMC,CACD;QAjCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAkBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;QAClC;IACF;AA4kBF;AAEO,eAAe5E,gBACpB0E,iBAAoC,EACpC3E,iBAAuC,EACvCU,MAAwB;IAExB,IAAI,CAACV,mBAAmB,QACtB,OAAOU;IAGT,IAAI,CAACiE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI3C,MACR,CAAC,gCAAgC,EAAE2C,kBAAkB,aAAa,EAAE;IAIxE,MAAMK,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAACjF;IACvB;IAEA,MAAM,EAAEkF,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMR,kBAAkB,2BAA2B,CAACK;IACtD,IAAI;QACF,MAAMzF,SAAS,MAAMmB;QAErB,MAAMe,QAAQ0D;QACd,IAAI1D,OACF,MAAMA;QAER,OAAOlC;IACT,SAAU;QACR2F;IACF;AACF"}
1
+ {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import {\n ConversationHistory,\n autoGLMPlanning,\n plan,\n uiTarsPlanning,\n} from '@/ai-model';\nimport { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeepThinkOption,\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n waitAfterAction?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n waitAfterAction?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.waitAfterAction = opts.waitAfterAction;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n waitAfterAction: opts.waitAfterAction,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n fileChooserAccept?: string[],\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n deepThink,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { modelFamily } = modelConfigForPlanning;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planImpl = isUITars(modelFamily)\n ? uiTarsPlanning\n : isAutoGLM(modelFamily)\n ? autoGLMPlanning\n : plan;\n\n const planResult = await planImpl(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n thought,\n log,\n note,\n error,\n usage,\n rawResponse,\n reasoning_content,\n finalizeSuccess,\n finalizeMessage,\n } = planResult;\n outputString = finalizeMessage;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n thought,\n note,\n yamlFlow: planResult.yamlFlow,\n output: finalizeMessage,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n // Check if task was finalized with failure\n if (finalizeSuccess === false) {\n assert(\n false,\n `Task failed: ${finalizeMessage || 'No error message provided'}\\n${log || ''}`,\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n // todo: set time string\n\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump, reasoning_content } = extractResult;\n applyDump(dump);\n task.reasoning_content = reasoning_content;\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","withFileChooser","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","modelFamily","actionSpace","action","Array","console","planImpl","isUITars","uiTarsPlanning","isAutoGLM","autoGLMPlanning","plan","planResult","JSON","actions","thought","log","note","error","usage","rawResponse","reasoning_content","finalizeSuccess","finalizeMessage","executables","getReadableTimeString","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","timeRemaining","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;;AAyDA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAsBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAgCQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BC,iBAA4B,EAS5B;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAED,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC;IAGN;IAEA,MAAc,UACZN,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAS3B;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIoB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJP,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEgB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAGJ,MAAO,KAAM;YACX,MAAMhB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;gBACF;gBACA,UAAU,OAAOd,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEqB,WAAW,EAAE,GAAG9B;oBAExB,MAAM+B,cAAc,IAAI,CAAC,cAAc;oBACvCxC,MACE,sCACAwC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDtB,OAAOuB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,WAAWC,SAASN,eACtBO,iBACAC,UAAUR,eACRS,kBACAC;oBAEN,MAAMC,aAAa,MAAMN,SAAS5B,MAAM,eAAe,EAAE;wBACvD,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CwB;wBACA,aAAa/B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACAC;oBACF;oBACA9B,MAAM,cAAcmD,KAAK,SAAS,CAACD,YAAY,MAAM;oBAErD,MAAM,EACJE,OAAO,EACPC,OAAO,EACPC,GAAG,EACHC,IAAI,EACJC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,iBAAiB,EACjBC,eAAe,EACfC,eAAe,EAChB,GAAGX;oBACJZ,eAAeuB;oBAEf5C,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCyC;oBACF;oBACAzC,gBAAgB,IAAI,CAAC,KAAK,GAAGwC;oBAC7BxC,gBAAgB,IAAI,CAAC,iBAAiB,GAAG0C;oBACzC1C,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAASmC,WAAW,EAAE;wBACtBE;wBACAD;wBACAE;wBACA,UAAUL,WAAW,QAAQ;wBAC7B,QAAQW;wBACR,wBAAwBX,WAAW,sBAAsB;oBAC3D;oBACAjC,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACqC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAG3D,IAAIM,AAAoB,UAApBA,iBACFzC,OACE,OACA,CAAC,aAAa,EAAE0C,mBAAmB,4BAA4B,EAAE,EAAEP,OAAO,IAAI;oBAIlF,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAMJ,aAAa5B,QAAQ;YAG3B,MAAMd,QAAQ0C,YAAY,WAAW,EAAE;YACvChB,SAAS,IAAI,IAAKgB,YAAY,YAAY,EAAE;YAE5C,IAAIY;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CtD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAO6B,OAAO;gBACd,OAAO3C,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAE2C,MAAM,SAAS,EAAEL,KAAK,SAAS,CAC5E3C,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDmC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAKnD,IAAI;gBACF,MAAM9B,QAAQ,YAAY,CAACiD,YAAY,KAAK;YAC9C,EAAE,OAAON,OAAY;gBAEnBnB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAE0B,wBAAwB,iCAAiC,EAAEP,OAAO,WAAWQ,OAAOR,QAAQ;gBACvJxD,MACE,yFACAwD,iBAAiBS,QAAQT,MAAM,OAAO,GAAGQ,OAAOR,QAChD,6CACAnB;YAEJ;YAEA,IAAIA,8BAA8BnC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAACqC,YAAY,wBACf;YAIF,EAAEjB;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM+B,WAAW,CAAC,UAAU,EAAE/B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOtB,QAAQ,eAAe,CAACqD;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAEH,wBAAwB,gDAAgD,CAAC;QAExI;QAEA,OAAO;YACL,QAAQ;gBACN7B;gBACA,QAAQI;YACV;YACAlB;QACF;IACF;IAEQ,oBACN+C,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOpD,OAAOyD;gBACtB,MAAM,EAAE1D,IAAI,EAAE,GAAG0D;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ7D,KAAK,GAAG,GAAG;wBACT6D;oBACF;gBACF;gBAGA,MAAM1D,YAAYuD,YAAY,SAAS;gBACvCtD,OAAOD,WAAW;gBAElB,MAAM2D,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DtE,MAAM;oBACN,MAAMmF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOf,OAAO;oBACd,IAAIA,iBAAiB6B,cACnBV,UAAUnB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE8B,IAAI,EAAE7B,KAAK,EAAEJ,OAAO,EAAEuB,IAAI,EAAEjB,iBAAiB,EAAE,GAAGsB;gBAC1DN,UAAUC;gBACV7D,KAAK,iBAAiB,GAAG4C;gBAEzB,IAAI4B,eAAeD;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTC,eAAeD;qBACV,IAAInB,AAAS,cAATA,MAEPoB,eADED,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTC,eAAe;qBACV;oBACLpE,OACEmE,MAAM,CAACP,YAAY,KAAK3C,QACxB;oBAEFmD,eAAgBD,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtCxE,KAAK,KAAK,GAAG0C;oBACb1C,KAAK,OAAO,GAAGsC;oBACf,MAAM,IAAIY,MAAM,CAAC,kBAAkB,EAAEZ,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQkC;oBACR,KAAKb;oBACLjB;oBACAJ;gBACF;YACF;QACF;QAEA,OAAOmB;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAM1D,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEqD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASjB,KAAK,SAAS,CAACiB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAMnD,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAAC2D;QAE1C,IAAI,CAAClD,QACH,MAAM,IAAI2C,MACR;QAIJ,MAAM,EAAE1C,MAAM,EAAE8B,OAAO,EAAE,GAAG/B;QAE5B,OAAO;YACLC;YACA8B;YACAjC;QACF;IACF;IAEA,MAAM,aAAaoE,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBpB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEsB,UAAU,EAAEpB,gBAAgB,EAAE,GAAGqB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM9E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW+E;QAE1B,MAAMzE,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJiF,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG5B;QACJ,MAAM6B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEA/E,OAAOuE,WAAW;QAClBvE,OAAO2E,WAAW;QAClB3E,OAAO4E,iBAAiB;QAExB5E,OACE4E,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAMhC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAmB,YACAtB,aACA8B,mBACA5B;YAGF,MAAMjD,SAAU,MAAMT,QAAQ,YAAY,CAAC2D;YAO3C,IAAIlD,QAAQ,QACV,OAAO;gBACL,QAAQc;gBACRhB;YACF;YAGFmF,eACEjF,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEqE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,gBAAgBX,kBAAmBU,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAMG,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQD;gBACV;gBACA,MAAM7F,QAAQ,MAAM,CAAC8F;YACvB;QACF;QAEA,OAAO9F,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAE0F,cAAc;IACnE;IAjnBA,YACEK,iBAAoC,EACpCC,OAAgB,EAChBC,IAOC,CACD;QApCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAEA;QAmBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,eAAe,GAAGA,KAAK,eAAe;QAC3C,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;YAChC,iBAAiBA,KAAK,eAAe;QACvC;IACF;AAslBF;AAEO,eAAe9E,gBACpB4E,iBAAoC,EACpC7E,iBAAuC,EACvCU,MAAwB;IAExB,IAAI,CAACV,mBAAmB,QACtB,OAAOU;IAGT,IAAI,CAACmE,kBAAkB,2BAA2B,EAChD,MAAM,IAAI3C,MACR,CAAC,gCAAgC,EAAE2C,kBAAkB,aAAa,EAAE;IAIxE,MAAMK,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAACnF;IACvB;IAEA,MAAM,EAAEoF,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMR,kBAAkB,2BAA2B,CAACK;IACtD,IAAI;QACF,MAAM3F,SAAS,MAAMmB;QAErB,MAAMe,QAAQ4D;QACd,IAAI5D,OACF,MAAMA;QAER,OAAOlC;IACT,SAAU;QACR6F;IACF;AACF"}
@@ -103,7 +103,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
103
103
  return;
104
104
  }
105
105
  }
106
- const getMidsceneVersion = ()=>"1.2.2-beta-20260120033218.0";
106
+ const getMidsceneVersion = ()=>"1.2.3-beta-20260120082504.0";
107
107
  const parsePrompt = (prompt)=>{
108
108
  if ('string' == typeof prompt) return {
109
109
  textPrompt: prompt,
@@ -6,11 +6,11 @@ import { adaptBboxToRect, expandSearchArea, mergeRects } from "../common.mjs";
6
6
  import { parseAutoGLMLocateResponse } from "./auto-glm/parser.mjs";
7
7
  import { getAutoGLMLocatePrompt } from "./auto-glm/prompt.mjs";
8
8
  import { isAutoGLM } from "./auto-glm/util.mjs";
9
- import { extractDataQueryPrompt, systemPromptToExtract } from "./prompt/extraction.mjs";
9
+ import { extractDataQueryPrompt, parseXMLExtractionResponse, systemPromptToExtract } from "./prompt/extraction.mjs";
10
10
  import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
11
11
  import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
12
12
  import { orderSensitiveJudgePrompt, systemPromptToJudgeOrderSensitive } from "./prompt/order-sensitive-judge.mjs";
13
- import { callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
13
+ import { callAI, callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
14
14
  const debugInspect = getDebug('ai:inspect');
15
15
  const debugSection = getDebug('ai:section');
16
16
  const extraTextFromUserPrompt = (prompt)=>{
@@ -313,11 +313,12 @@ async function AiExtractElementInfo(options) {
313
313
  });
314
314
  msgs.push(...addOns);
315
315
  }
316
- const result = await callAIWithObjectResponse(msgs, modelConfig);
316
+ const { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelConfig);
317
+ const parseResult = parseXMLExtractionResponse(rawResponse);
317
318
  return {
318
- parseResult: result.content,
319
- usage: result.usage,
320
- reasoning_content: result.reasoning_content
319
+ parseResult,
320
+ usage,
321
+ reasoning_content
321
322
  };
322
323
  }
323
324
  async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Create a small bbox around the point\n const bboxSize = 10;\n const x1 = Math.max(pixelX - bboxSize / 2, 0);\n const y1 = Math.max(pixelY - bboxSize / 2, 0);\n const x2 = Math.min(pixelX + bboxSize / 2, imageWidth);\n const y2 = Math.min(pixelY + bboxSize / 2, imageHeight);\n\n // Convert to Rect format\n resRect = {\n left: x1,\n top: y1,\n width: x2 - x1,\n height: y2 - y1,\n };\n\n // Apply offset if searching in a cropped area\n if (options.searchConfig?.rect) {\n resRect.left += options.searchConfig.rect.left;\n resRect.top += options.searchConfig.rect.top;\n }\n\n debugInspect('auto-glm resRect:', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n const res = await callAIFn(msgs, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, modelFamily);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","bboxSize","x1","y1","x2","y2","rectCenter","element","generateElementByPosition","res","rawResponse","JSON","Array","adaptBboxToRect","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AAuDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAQrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElDM,OACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUP,eAC3BQ,uBAAuBR,eACvBS,4BAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAaf,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIgB,cAAchB,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIiB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjB,QAAQ,YAAY,EAAE;QACxBO,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFe,eAAef,QAAQ,YAAY,CAAC,WAAW;QAC/CgB,aAAahB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiB,cAAcjB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMzB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMoB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI2B;IACf;IAEA,IAAIV,UAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB9B,MAAMS;QAEvChB,aAAa,yBAAyBmC;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1CnC,aAAa,sBAAsBsC,OAAO,KAAK;QAC/CtC,aAAa,yBAAyBsC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DtC,aAAa,yBAAyB0C,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCtC,aAAa,iCAAiC;gBAAE2C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C7B,aAAa,+BAA+B;gBAAE6C;gBAAQE;YAAO;YAG7D,MAAMC,WAAW;YACjB,MAAMC,KAAKH,KAAK,GAAG,CAACD,SAASG,WAAW,GAAG;YAC3C,MAAME,KAAKJ,KAAK,GAAG,CAACC,SAASC,WAAW,GAAG;YAC3C,MAAMG,KAAKL,KAAK,GAAG,CAACD,SAASG,WAAW,GAAGpB;YAC3C,MAAMwB,KAAKN,KAAK,GAAG,CAACC,SAASC,WAAW,GAAGnB;YAG3CW,UAAU;gBACR,MAAMS;gBACN,KAAKC;gBACL,OAAOC,KAAKF;gBACZ,QAAQG,KAAKF;YACf;YAGA,IAAItC,QAAQ,YAAY,EAAE,MAAM;gBAC9B4B,QAAQ,IAAI,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBAC9C4B,QAAQ,GAAG,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YAC9C;YAEAZ,aAAa,qBAAqBwC;YAElC,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,0BACnCF,YACAjC;YAGF,IAAIkC,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMd;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,MAAMkB,MAAM,MAAMzC,SAASR,MAAMS;IAEjC,MAAMyC,cAAcC,KAAK,SAAS,CAACF,IAAI,OAAO;IAE9C,IAAIhB;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYc,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBG,MAAM,OAAO,CAACH,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAhB,UAAUoB,gBACRJ,IAAI,OAAO,CAAC,IAAI,EAChB5B,YACAC,aACAjB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkB,oBACAC,qBACAd;YAGFjB,aAAa,WAAWwC;YAExB,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,0BACnCF,YACAjC;YAEFsB,SAAS,EAAE;YAEX,IAAIY,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;IACF,EAAE,OAAOO,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACnB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEoB,IAAI,CAAC,CAAC;aAFtBpB,SAAS;YAACoB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMtB;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAe;QACA,OAAOD,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAeQ,gBAAgBpD,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoD,kBAAkB,EAAEjD,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMU,eAAe2C,4BAA4BjD;IACjD,MAAMkD,gCAAgCC,0BACpCjE,wBAAwB8D;IAE1B,MAAM1D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM/B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQ4D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1D,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMmC,SAAS,MAAMC,yBACnB/D,MACAS;IAGF,IAAIuD;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAab,gBACjBY,aACA3D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnE,aAAa,wBAAwBwE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASjB,MAAM,OAAO,CAACiB,OAC/B,GAAG,CAAC,CAACA,OACGhB,gBACLgB,MACA/D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByE;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DzE,aAAa,iBAAiB2E;QAG9BN,cAAcQ,iBAAiBF,YAAYhE,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqE;IAC1C;IAEA,IAAIS,cAAc9D;IAClB,IAAIqD,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BhE,kBACAqD,aACAtD,AAAgB,iBAAhBA;QAEF+D,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvE,OAO7C;IACC,MAAM,EAAEwE,SAAS,EAAEvE,OAAO,EAAEwE,aAAa,EAAE/E,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+D;IACrB,MAAMpE,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0E,wBAAwBC,uBAC5B5E,QAAQ,eAAe,IAAI,IAC3BwE;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvE;YACL,QAAQ;QACV;IACF;IAGFuE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkE;QACX;KACD;IAED,IAAInF,kBAAkB;QACpB,MAAM4B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMmC,SAAS,MAAMC,yBACnB/D,MACAS;IAEF,OAAO;QACL,aAAaqD,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB5E,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeqE;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMpF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASsE;QACX;KACD;IAED,MAAMxB,SAAS,MAAMtD,SAASR,MAAMS;IAEpC,OAAO;QACL,kBAAkBqD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport { adaptBboxToRect, expandSearchArea, mergeRects } from '../common';\nimport { parseAutoGLMLocateResponse } from './auto-glm/parser';\nimport { getAutoGLMLocatePrompt } from './auto-glm/prompt';\nimport { isAutoGLM } from './auto-glm/util';\nimport {\n extractDataQueryPrompt,\n parseXMLExtractionResponse,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport {\n callAI,\n callAIWithObjectResponse,\n callAIWithStringResponse,\n} from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = isAutoGLM(modelFamily)\n ? getAutoGLMLocatePrompt(modelFamily)\n : systemPromptToLocateElement(modelFamily);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: isAutoGLM(modelFamily)\n ? `Tap: ${userInstructionPrompt}`\n : userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n if (isAutoGLM(modelFamily)) {\n const { content: rawResponseContent, usage } =\n await callAIWithStringResponse(msgs, modelConfig);\n\n debugInspect('auto-glm rawResponse:', rawResponseContent);\n\n const parsed = parseAutoGLMLocateResponse(rawResponseContent);\n\n debugInspect('auto-glm thinking:', parsed.think);\n debugInspect('auto-glm coordinates:', parsed.coordinates);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] = [];\n\n if (parsed.error || !parsed.coordinates) {\n errors = [parsed.error || 'Failed to parse auto-glm response'];\n debugInspect('auto-glm parse error:', errors[0]);\n } else {\n const { x, y } = parsed.coordinates;\n\n debugInspect('auto-glm coordinates [0-999]:', { x, y });\n\n // Convert auto-glm coordinates [0,999] to pixel bbox\n // Map from [0,999] to pixel coordinates\n const pixelX = Math.round((x * imageWidth) / 1000);\n const pixelY = Math.round((y * imageHeight) / 1000);\n\n debugInspect('auto-glm pixel coordinates:', { pixelX, pixelY });\n\n // Create a small bbox around the point\n const bboxSize = 10;\n const x1 = Math.max(pixelX - bboxSize / 2, 0);\n const y1 = Math.max(pixelY - bboxSize / 2, 0);\n const x2 = Math.min(pixelX + bboxSize / 2, imageWidth);\n const y2 = Math.min(pixelY + bboxSize / 2, imageHeight);\n\n // Convert to Rect format\n resRect = {\n left: x1,\n top: y1,\n width: x2 - x1,\n height: y2 - y1,\n };\n\n // Apply offset if searching in a cropped area\n if (options.searchConfig?.rect) {\n resRect.left += options.searchConfig.rect.left;\n resRect.top += options.searchConfig.rect.top;\n }\n\n debugInspect('auto-glm resRect:', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n\n if (element) {\n matchedElements = [element];\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse: rawResponseContent,\n usage,\n reasoning_content: parsed.think,\n };\n }\n\n const res = await callAIFn(msgs, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: LocateResultElement[] = [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n modelFamily,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { modelFamily } = modelConfig;\n const screenshotBase64 = context.screenshot.base64;\n\n const systemPrompt = systemPromptToLocateSection(modelFamily);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n modelFamily,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, modelFamily);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n modelFamily === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const screenshotBase64 = context.screenshot.base64;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig);\n\n // Parse XML response to JSON object\n const parseResult = parseXMLExtractionResponse<T>(rawResponse);\n\n return {\n parseResult,\n usage,\n reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(msgs, modelConfig);\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","modelFamily","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","isAutoGLM","getAutoGLMLocatePrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","rawResponseContent","usage","callAIWithStringResponse","parsed","parseAutoGLMLocateResponse","resRect","matchedElements","errors","x","y","pixelX","Math","pixelY","bboxSize","x1","y1","x2","y2","rectCenter","element","generateElementByPosition","res","rawResponse","JSON","Array","adaptBboxToRect","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","reasoning_content","callAI","parseResult","parseXMLExtractionResponse","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;;;;AAuDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OAQrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElDM,OACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,UAAUP,eAC3BQ,uBAAuBR,eACvBS,4BAA4BT;IAEhC,IAAIU,eAAeT;IACnB,IAAIU,aAAaf,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIgB,cAAchB,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIiB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIjB,QAAQ,YAAY,EAAE;QACxBO,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFe,eAAef,QAAQ,YAAY,CAAC,WAAW;QAC/CgB,aAAahB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCiB,cAAcjB,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCkB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIZ,AAAgB,iBAAhBA,aAA8B;QACvC,MAAMe,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMzB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKI;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMH,UAAUP,eACZ,CAAC,KAAK,EAAEI,uBAAuB,GAC/BA;gBACN;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMoB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI2B;IACf;IAEA,IAAIV,UAAUP,cAAc;QAC1B,MAAM,EAAE,SAASkB,kBAAkB,EAAEC,KAAK,EAAE,GAC1C,MAAMC,yBAAyB9B,MAAMS;QAEvChB,aAAa,yBAAyBmC;QAEtC,MAAMG,SAASC,2BAA2BJ;QAE1CnC,aAAa,sBAAsBsC,OAAO,KAAK;QAC/CtC,aAAa,yBAAyBsC,OAAO,WAAW;QAExD,IAAIE;QACJ,IAAIC,kBAAyC,EAAE;QAC/C,IAAIC,SAAmB,EAAE;QAEzB,IAAIJ,OAAO,KAAK,IAAI,CAACA,OAAO,WAAW,EAAE;YACvCI,SAAS;gBAACJ,OAAO,KAAK,IAAI;aAAoC;YAC9DtC,aAAa,yBAAyB0C,MAAM,CAAC,EAAE;QACjD,OAAO;YACL,MAAM,EAAEC,CAAC,EAAEC,CAAC,EAAE,GAAGN,OAAO,WAAW;YAEnCtC,aAAa,iCAAiC;gBAAE2C;gBAAGC;YAAE;YAIrD,MAAMC,SAASC,KAAK,KAAK,CAAEH,IAAIf,aAAc;YAC7C,MAAMmB,SAASD,KAAK,KAAK,CAAEF,IAAIf,cAAe;YAE9C7B,aAAa,+BAA+B;gBAAE6C;gBAAQE;YAAO;YAG7D,MAAMC,WAAW;YACjB,MAAMC,KAAKH,KAAK,GAAG,CAACD,SAASG,WAAW,GAAG;YAC3C,MAAME,KAAKJ,KAAK,GAAG,CAACC,SAASC,WAAW,GAAG;YAC3C,MAAMG,KAAKL,KAAK,GAAG,CAACD,SAASG,WAAW,GAAGpB;YAC3C,MAAMwB,KAAKN,KAAK,GAAG,CAACC,SAASC,WAAW,GAAGnB;YAG3CW,UAAU;gBACR,MAAMS;gBACN,KAAKC;gBACL,OAAOC,KAAKF;gBACZ,QAAQG,KAAKF;YACf;YAGA,IAAItC,QAAQ,YAAY,EAAE,MAAM;gBAC9B4B,QAAQ,IAAI,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,IAAI;gBAC9C4B,QAAQ,GAAG,IAAI5B,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG;YAC9C;YAEAZ,aAAa,qBAAqBwC;YAElC,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,0BACnCF,YACAjC;YAGF,IAAIkC,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;QAEA,OAAO;YACL,MAAMd;YACN,aAAa;gBACX,UAAUC;gBACVC;YACF;YACA,aAAaP;YACbC;YACA,mBAAmBE,OAAO,KAAK;QACjC;IACF;IAEA,MAAMkB,MAAM,MAAMzC,SAASR,MAAMS;IAEjC,MAAMyC,cAAcC,KAAK,SAAS,CAACF,IAAI,OAAO;IAE9C,IAAIhB;IACJ,IAAIC,kBAAyC,EAAE;IAC/C,IAAIC,SACF,YAAYc,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBG,MAAM,OAAO,CAACH,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAhB,UAAUoB,gBACRJ,IAAI,OAAO,CAAC,IAAI,EAChB5B,YACAC,aACAjB,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BkB,oBACAC,qBACAd;YAGFjB,aAAa,WAAWwC;YAExB,MAAMa,aAAa;gBACjB,GAAGb,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMc,UAA+BC,0BACnCF,YACAjC;YAEFsB,SAAS,EAAE;YAEX,IAAIY,SACFb,kBAAkB;gBAACa;aAAQ;QAE/B;IACF,EAAE,OAAOO,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACnB,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEoB,IAAI,CAAC,CAAC;aAFtBpB,SAAS;YAACoB;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMtB;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAe;QACA,OAAOD,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAeQ,gBAAgBpD,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoD,kBAAkB,EAAEjD,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,WAAW,EAAE,GAAGD;IACxB,MAAME,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAMU,eAAe2C,4BAA4BjD;IACjD,MAAMkD,gCAAgCC,0BACpCjE,wBAAwB8D;IAE1B,MAAM1D,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiD;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM/B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQ4D,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1D,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMmC,SAAS,MAAMC,yBACnB/D,MACAS;IAGF,IAAIuD;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAab,gBACjBY,aACA3D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnE,aAAa,wBAAwBwE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASjB,MAAM,OAAO,CAACiB,OAC/B,GAAG,CAAC,CAACA,OACGhB,gBACLgB,MACA/D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByE;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DzE,aAAa,iBAAiB2E;QAG9BN,cAAcQ,iBAAiBF,YAAYhE,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqE;IAC1C;IAEA,IAAIS,cAAc9D;IAClB,IAAIqD,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BhE,kBACAqD,aACAtD,AAAgB,iBAAhBA;QAEF+D,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAaX,KAAK,SAAS,CAACW,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvE,OAO7C;IACC,MAAM,EAAEwE,SAAS,EAAEvE,OAAO,EAAEwE,aAAa,EAAE/E,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+D;IACrB,MAAMpE,mBAAmBL,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM0E,wBAAwBC,uBAC5B5E,QAAQ,eAAe,IAAI,IAC3BwE;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvE;YACL,QAAQ;QACV;IACF;IAGFuE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkE;QACX;KACD;IAED,IAAInF,kBAAkB;QACpB,MAAM4B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAM,EACJ,SAASuB,WAAW,EACpBrB,KAAK,EACLsD,iBAAiB,EAClB,GAAG,MAAMC,OAAOpF,MAAMS;IAGvB,MAAM4E,cAAcC,2BAA8BpC;IAElD,OAAO;QACLmC;QACAxD;QACAsD;IACF;AACF;AAEO,eAAeI,sBACpBC,WAAmB,EACnBhF,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeyE;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMxF,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS0E;QACX;KACD;IAED,MAAM5B,SAAS,MAAMtD,SAASR,MAAMS;IAEpC,OAAO;QACL,kBAAkBqD,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -1,10 +1,63 @@
1
1
  import { paddingToMatchBlockByBase64 } from "@midscene/shared/img";
2
2
  import { getDebug } from "@midscene/shared/logger";
3
3
  import { assert } from "@midscene/shared/utils";
4
- import { buildYamlFlowFromPlans, fillBboxParam, finalizeActionName, findAllMidsceneLocatorField } from "../common.mjs";
4
+ import { buildYamlFlowFromPlans, fillBboxParam, findAllMidsceneLocatorField } from "../common.mjs";
5
5
  import { systemPromptToTaskPlanning } from "./prompt/llm-planning.mjs";
6
- import { callAIWithObjectResponse } from "./service-caller/index.mjs";
6
+ import { extractXMLTag } from "./prompt/util.mjs";
7
+ import { callAI, safeParseJson } from "./service-caller/index.mjs";
7
8
  const debug = getDebug('planning');
9
+ function parseXMLPlanningResponse(xmlString, modelFamily) {
10
+ const thought = extractXMLTag(xmlString, 'thought');
11
+ const note = extractXMLTag(xmlString, 'note');
12
+ const log = extractXMLTag(xmlString, 'log');
13
+ const error = extractXMLTag(xmlString, 'error');
14
+ const actionType = extractXMLTag(xmlString, 'action-type');
15
+ const actionParamStr = extractXMLTag(xmlString, 'action-param-json');
16
+ const completeTaskRegex = /<complete-task\s+success="(true|false)">([\s\S]*?)<\/complete-task>/i;
17
+ const completeTaskMatch = xmlString.match(completeTaskRegex);
18
+ let finalizeMessage;
19
+ let finalizeSuccess;
20
+ if (completeTaskMatch) {
21
+ finalizeSuccess = 'true' === completeTaskMatch[1];
22
+ finalizeMessage = completeTaskMatch[2]?.trim() || void 0;
23
+ }
24
+ if (!log) throw new Error('Missing required field: log');
25
+ let action = null;
26
+ if (actionType && 'null' !== actionType.toLowerCase()) {
27
+ const type = actionType.trim();
28
+ let param;
29
+ if (actionParamStr) try {
30
+ param = safeParseJson(actionParamStr, modelFamily);
31
+ } catch (e) {
32
+ throw new Error(`Failed to parse action-param-json: ${e}`);
33
+ }
34
+ action = {
35
+ type,
36
+ ...void 0 !== param ? {
37
+ param
38
+ } : {}
39
+ };
40
+ }
41
+ return {
42
+ ...thought ? {
43
+ thought
44
+ } : {},
45
+ ...note ? {
46
+ note
47
+ } : {},
48
+ log,
49
+ ...error ? {
50
+ error
51
+ } : {},
52
+ action,
53
+ ...void 0 !== finalizeMessage ? {
54
+ finalizeMessage
55
+ } : {},
56
+ ...void 0 !== finalizeSuccess ? {
57
+ finalizeSuccess
58
+ } : {}
59
+ };
60
+ }
8
61
  async function plan(userInstruction, opts) {
9
62
  const { context, modelConfig, conversationHistory } = opts;
10
63
  const { size } = context;
@@ -84,15 +137,16 @@ async function plan(userInstruction, opts) {
84
137
  ...instruction,
85
138
  ...historyLog
86
139
  ];
87
- const { content: planFromAI, contentString: rawResponse, usage, reasoning_content } = await callAIWithObjectResponse(msgs, modelConfig, {
140
+ const { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelConfig, {
88
141
  deepThink: 'unset' === opts.deepThink ? void 0 : opts.deepThink
89
142
  });
143
+ const planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);
90
144
  const actions = planFromAI.action ? [
91
145
  planFromAI.action
92
146
  ] : [];
93
147
  let shouldContinuePlanning = true;
94
- if (actions[0]?.type === finalizeActionName) {
95
- debug('finalize action planned, stop planning');
148
+ if (void 0 !== planFromAI.finalizeSuccess) {
149
+ debug('task finalized via complete-task tag, stop planning');
96
150
  shouldContinuePlanning = false;
97
151
  }
98
152
  const returnValue = {
@@ -127,6 +181,6 @@ async function plan(userInstruction, opts) {
127
181
  });
128
182
  return returnValue;
129
183
  }
130
- export { plan };
184
+ export { parseXMLPlanningResponse, plan };
131
185
 
132
186
  //# sourceMappingURL=llm-planning.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n finalizeActionName,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: opts.deepThink !== true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n if (actions[0]?.type === finalizeActionName) {\n debug('finalize action planned, stop planning');\n shouldContinuePlanning = false;\n }\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n modelFamily,\n );\n }\n });\n });\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","modelFamily","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","undefined","actions","shouldContinuePlanning","finalizeActionName","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam"],"mappings":";;;;;;AAuBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEK,WAAW,EAAE,GAAGJ;IAExB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;QAC7B,gBAAgBA,AAAmB,SAAnBA,KAAK,SAAS;IAChC;IAEA,IAAIS,eAAeJ;IACnB,IAAIK,aAAaN,KAAK,KAAK;IAC3B,IAAIO,cAAcP,KAAK,MAAM;IAC7B,MAAMQ,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,yBACRL,MACAlB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe0B,SAAY1B,KAAK,SAAS;IACpE;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,IAAIO,yBAAyB;IAC7B,IAAID,OAAO,CAAC,EAAE,EAAE,SAASE,oBAAoB;QAC3CjC,MAAM;QACNgC,yBAAyB;IAC3B;IACA,MAAME,cAAkC;QACtC,GAAGT,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUO,uBAAuBJ,SAAS3B,KAAK,WAAW;QAC1D4B;IACF;IAEAI,OAAOX,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACM;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACiC,SAAWA,OAAO,IAAI,KAAKC;QAG9BtC,MAAM,+BAA+BuC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENvC,MAAM,gBAAgBwC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgBjC,AAAgBoB,WAAhBpB,aAElB2B,OAAO,KAAK,CAACK,MAAM,GAAGE,cACpBD,cACA7B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOQ;AACT"}
1
+ {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig, TModelFamily } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { extractXMLTag } from './prompt/util';\nimport { callAI } from './service-caller/index';\nimport { safeParseJson } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\n/**\n * Parse XML response from LLM and convert to RawResponsePlanningAIResponse\n */\nexport function parseXMLPlanningResponse(\n xmlString: string,\n modelFamily: TModelFamily | undefined,\n): RawResponsePlanningAIResponse {\n const thought = extractXMLTag(xmlString, 'thought');\n const note = extractXMLTag(xmlString, 'note');\n const log = extractXMLTag(xmlString, 'log');\n const error = extractXMLTag(xmlString, 'error');\n const actionType = extractXMLTag(xmlString, 'action-type');\n const actionParamStr = extractXMLTag(xmlString, 'action-param-json');\n\n // Parse complete-task tag with success attribute\n const completeTaskRegex =\n /<complete-task\\s+success=\"(true|false)\">([\\s\\S]*?)<\\/complete-task>/i;\n const completeTaskMatch = xmlString.match(completeTaskRegex);\n let finalizeMessage: string | undefined;\n let finalizeSuccess: boolean | undefined;\n\n if (completeTaskMatch) {\n finalizeSuccess = completeTaskMatch[1] === 'true';\n finalizeMessage = completeTaskMatch[2]?.trim() || undefined;\n }\n\n // Validate required fields\n if (!log) {\n throw new Error('Missing required field: log');\n }\n\n // Parse action\n let action: any = null;\n if (actionType && actionType.toLowerCase() !== 'null') {\n const type = actionType.trim();\n let param: any = undefined;\n\n if (actionParamStr) {\n try {\n // Parse the JSON string in action-param-json\n param = safeParseJson(actionParamStr, modelFamily);\n } catch (e) {\n throw new Error(`Failed to parse action-param-json: ${e}`);\n }\n }\n\n action = {\n type,\n ...(param !== undefined ? { param } : {}),\n };\n }\n\n return {\n ...(thought ? { thought } : {}),\n ...(note ? { note } : {}),\n log,\n ...(error ? { error } : {}),\n action,\n ...(finalizeMessage !== undefined ? { finalizeMessage } : {}),\n ...(finalizeSuccess !== undefined ? { finalizeSuccess } : {}),\n };\n}\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n\n const { modelFamily } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n modelFamily,\n includeBbox: opts.includeBbox,\n includeThought: opts.deepThink !== true,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (modelFamily === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: rawResponse,\n usage,\n reasoning_content,\n } = await callAI(msgs, modelConfig, {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n });\n\n // Parse XML response to JSON object\n const planFromAI = parseXMLPlanningResponse(rawResponse, modelFamily);\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n\n // Check if task is finalized via complete-task tag\n if (planFromAI.finalizeSuccess !== undefined) {\n debug('task finalized via complete-task tag, stop planning');\n shouldContinuePlanning = false;\n }\n\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && modelFamily !== undefined) {\n // Always use model family to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n modelFamily,\n );\n }\n });\n });\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","parseXMLPlanningResponse","xmlString","modelFamily","thought","extractXMLTag","note","log","error","actionType","actionParamStr","completeTaskRegex","completeTaskMatch","finalizeMessage","finalizeSuccess","undefined","Error","action","type","param","safeParseJson","e","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","rawResponse","usage","reasoning_content","callAI","planFromAI","actions","shouldContinuePlanning","returnValue","buildYamlFlowFromPlans","assert","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam"],"mappings":";;;;;;;AAwBA,MAAMA,QAAQC,SAAS;AAKhB,SAASC,yBACdC,SAAiB,EACjBC,WAAqC;IAErC,MAAMC,UAAUC,cAAcH,WAAW;IACzC,MAAMI,OAAOD,cAAcH,WAAW;IACtC,MAAMK,MAAMF,cAAcH,WAAW;IACrC,MAAMM,QAAQH,cAAcH,WAAW;IACvC,MAAMO,aAAaJ,cAAcH,WAAW;IAC5C,MAAMQ,iBAAiBL,cAAcH,WAAW;IAGhD,MAAMS,oBACJ;IACF,MAAMC,oBAAoBV,UAAU,KAAK,CAACS;IAC1C,IAAIE;IACJ,IAAIC;IAEJ,IAAIF,mBAAmB;QACrBE,kBAAkBF,AAAyB,WAAzBA,iBAAiB,CAAC,EAAE;QACtCC,kBAAkBD,iBAAiB,CAAC,EAAE,EAAE,UAAUG;IACpD;IAGA,IAAI,CAACR,KACH,MAAM,IAAIS,MAAM;IAIlB,IAAIC,SAAc;IAClB,IAAIR,cAAcA,AAA6B,WAA7BA,WAAW,WAAW,IAAe;QACrD,MAAMS,OAAOT,WAAW,IAAI;QAC5B,IAAIU;QAEJ,IAAIT,gBACF,IAAI;YAEFS,QAAQC,cAAcV,gBAAgBP;QACxC,EAAE,OAAOkB,GAAG;YACV,MAAM,IAAIL,MAAM,CAAC,mCAAmC,EAAEK,GAAG;QAC3D;QAGFJ,SAAS;YACPC;YACA,GAAIC,AAAUJ,WAAVI,QAAsB;gBAAEA;YAAM,IAAI,CAAC,CAAC;QAC1C;IACF;IAEA,OAAO;QACL,GAAIf,UAAU;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAC9B,GAAIE,OAAO;YAAEA;QAAK,IAAI,CAAC,CAAC;QACxBC;QACA,GAAIC,QAAQ;YAAEA;QAAM,IAAI,CAAC,CAAC;QAC1BS;QACA,GAAIJ,AAAoBE,WAApBF,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;QAC5D,GAAIC,AAAoBC,WAApBD,kBAAgC;YAAEA;QAAgB,IAAI,CAAC,CAAC;IAC9D;AACF;AAEO,eAAeQ,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,MAAM;IAElD,MAAM,EAAEtB,WAAW,EAAE,GAAGuB;IAExB,MAAMI,eAAe,MAAMC,2BAA2B;QACpD,aAAaP,KAAK,WAAW;QAC7BrB;QACA,aAAaqB,KAAK,WAAW;QAC7B,gBAAgBA,AAAmB,SAAnBA,KAAK,SAAS;IAChC;IAEA,IAAIQ,eAAeH;IACnB,IAAII,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAI/B,AAAgB,iBAAhBA,aAA8B;QAChC,MAAMkC,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBf,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMgB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEhB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAIkB;IAEJ,IAAId,oBAAoB,sBAAsB,EAAE;QAC9Cc,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGd,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKK;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAL,oBAAoB,mCAAmC;IACzD,OACEc,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFL,oBAAoB,MAAM,CAACc;IAC3B,MAAMC,aAAaf,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMmB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,WAAW,EACpBC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,OAAOJ,MAAMjB,aAAa;QAClC,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAeT,SAAYS,KAAK,SAAS;IACpE;IAGA,MAAMwB,aAAa/C,yBAAyB2C,aAAazC;IAEzD,MAAM8C,UAAUD,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,IAAIE,yBAAyB;IAG7B,IAAIF,AAA+BjC,WAA/BiC,WAAW,eAAe,EAAgB;QAC5CjD,MAAM;QACNmD,yBAAyB;IAC3B;IAEA,MAAMC,cAAkC;QACtC,GAAGH,UAAU;QACbC;QACAL;QACAC;QACAC;QACA,UAAUM,uBAAuBH,SAASzB,KAAK,WAAW;QAC1D0B;IACF;IAEAG,OAAOL,YAAY;IAEnBC,QAAQ,OAAO,CAAC,CAAChC;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAMqC,sBAAsB9B,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACP,SAAWA,OAAO,IAAI,KAAKC;QAG9BnB,MAAM,+BAA+BuD;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENvD,MAAM,gBAAgBwD;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAezC,OAAO,KAAK,CAACwC,MAAM;YACxC,IAAIC,gBAAgBvD,AAAgBY,WAAhBZ,aAElBc,OAAO,KAAK,CAACwC,MAAM,GAAGE,cACpBD,cACAzB,YACAC,aACAC,YACAC,aACAjC;QAGN;IACF;IAEAwB,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMiB;YACR;SACD;IACH;IAEA,OAAOO;AACT"}