@midscene/core 1.0.5-beta-20251230135517.0 → 1.0.5-beta-20251231065132.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/dist/es/agent/agent.mjs +3 -5
  2. package/dist/es/agent/agent.mjs.map +1 -1
  3. package/dist/es/agent/task-builder.mjs +1 -0
  4. package/dist/es/agent/task-builder.mjs.map +1 -1
  5. package/dist/es/agent/tasks.mjs +6 -4
  6. package/dist/es/agent/tasks.mjs.map +1 -1
  7. package/dist/es/agent/utils.mjs +1 -1
  8. package/dist/es/ai-model/inspect.mjs +4 -2
  9. package/dist/es/ai-model/inspect.mjs.map +1 -1
  10. package/dist/es/ai-model/llm-planning.mjs +3 -2
  11. package/dist/es/ai-model/llm-planning.mjs.map +1 -1
  12. package/dist/es/ai-model/service-caller/index.mjs +48 -9
  13. package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
  14. package/dist/es/service/index.mjs +7 -4
  15. package/dist/es/service/index.mjs.map +1 -1
  16. package/dist/es/types.mjs.map +1 -1
  17. package/dist/es/utils.mjs +2 -2
  18. package/dist/lib/agent/agent.js +3 -5
  19. package/dist/lib/agent/agent.js.map +1 -1
  20. package/dist/lib/agent/task-builder.js +1 -0
  21. package/dist/lib/agent/task-builder.js.map +1 -1
  22. package/dist/lib/agent/tasks.js +6 -4
  23. package/dist/lib/agent/tasks.js.map +1 -1
  24. package/dist/lib/agent/utils.js +1 -1
  25. package/dist/lib/ai-model/inspect.js +4 -2
  26. package/dist/lib/ai-model/inspect.js.map +1 -1
  27. package/dist/lib/ai-model/llm-planning.js +3 -2
  28. package/dist/lib/ai-model/llm-planning.js.map +1 -1
  29. package/dist/lib/ai-model/service-caller/index.js +50 -8
  30. package/dist/lib/ai-model/service-caller/index.js.map +1 -1
  31. package/dist/lib/service/index.js +7 -4
  32. package/dist/lib/service/index.js.map +1 -1
  33. package/dist/lib/types.js.map +1 -1
  34. package/dist/lib/utils.js +2 -2
  35. package/dist/types/agent/agent.d.ts +1 -1
  36. package/dist/types/agent/tasks.d.ts +1 -1
  37. package/dist/types/ai-model/inspect.d.ts +2 -0
  38. package/dist/types/ai-model/llm-planning.d.ts +1 -1
  39. package/dist/types/ai-model/service-caller/index.d.ts +12 -2
  40. package/dist/types/types.d.ts +4 -0
  41. package/dist/types/yaml.d.ts +0 -1
  42. package/package.json +2 -2
@@ -1 +1 @@
1
- {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n qwen3_vl_enable_thinking?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n qwen3_vl_enable_thinking:\n modelConfigForPlanning.vlMode === 'qwen3-vl' &&\n qwen3_vl_enable_thinking,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.output = {\n actions: actions || [],\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n };\n executorContext.uiContext = uiContext;\n\n const finalActions = [...(actions || [])];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n let errorFlag = false;\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check if task is complete\n if (!planResult?.more_actions_needed_by_instruction) {\n if (errorFlag) {\n debug(\n 'more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run',\n );\n } else {\n break;\n }\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage =\n 'I have finished the action previously planned.';\n }\n }\n\n const finalResult = {\n output: {\n yamlFlow,\n },\n runner,\n };\n return finalResult;\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n undefined,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","qwen3_vl_enable_thinking","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","startTime","Date","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","executables","errorFlag","String","Error","errorMsg","finalResult","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","lastCheckStart","errorThought","currentCheckStart","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA+CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAoBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA6BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,wBAAkC,EAQlC;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIkB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJL,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEc,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAGlC,MAAO,KAAM;YACX,MAAMb,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;gBACF;gBACA,UAAU,OAAOb,OAAOC;oBACtB,MAAMmB,YAAYC,KAAK,GAAG;oBAC1B,MAAM,EAAEnB,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEoB,MAAM,EAAE,GAAG7B;oBACnB,MAAM8B,qBACJD,AAAW,kBAAXA,SACI7B,uBAAuB,kBAAkB,GACzCyB;oBAEN,MAAMM,cAAc,IAAI,CAAC,cAAc;oBACvCxC,MACE,sCACAwC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDtB,OAAOuB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAG9B,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CwB;wBACA,aAAa/B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACA,0BACEpB,AAAkC,eAAlCA,uBAAuB,MAAM,IAC7BqB;oBACJ;oBACA9B,MAAM,cAAc+C,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;oBAEJ3B,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCoC;oBACF;oBACApC,gBAAgB,IAAI,CAAC,KAAK,GAAGmC;oBAC7BnC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS+B,WAAW,EAAE;wBACtBE;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA3B,gBAAgB,SAAS,GAAGC;oBAE5B,MAAMqC,eAAe;2BAAKP,WAAW,EAAE;qBAAE;oBAEzC,IAAIM,OAAO;wBACT,MAAME,UAAUnB,KAAK,GAAG;wBACxB,MAAMoB,gBAAgBH,QAASE,CAAAA,UAAUpB,SAAQ;wBACjD,IAAIqB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;oBAErC;oBAEAtC,OAAO,CAACgC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAatB,QAAQ;YAG3B,MAAMd,QAAQoC,YAAY,WAAW,EAAE;YACvCZ,SAAS,IAAI,IAAKY,YAAY,YAAY,EAAE;YAE5C,IAAIc;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9ClD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAOwB,OAAO;gBACd,OAAOtC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEsC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5EvC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDmC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAGnD,IAAIgB,YAAY;YAChB,IAAI;gBACF,MAAM9C,QAAQ,YAAY,CAAC6C,YAAY,KAAK;YAC9C,EAAE,OAAOP,OAAY;gBACnBQ,YAAY;gBACZxB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,+BAA+B,EAAEgB,OAAO,WAAWS,OAAOT,QAAQ;gBACrHnD,MACE,yFACAmD,iBAAiBU,QAAQV,MAAM,OAAO,GAAGS,OAAOT,QAChD,6CACAhB;YAEJ;YAEA,IAAIA,8BAA8BjC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAAC+B,YAAY,oCACf,IAAIe,WACF3D,MACE;iBAGF;YAKJ,EAAE+B;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM6B,WAAW,CAAC,UAAU,EAAE7B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOpB,QAAQ,eAAe,CAACiD;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAC7C;QAEN;QAEA,MAAMC,cAAc;YAClB,QAAQ;gBACN/B;YACF;YACAZ;QACF;QACA,OAAO2C;IACT;IAEQ,oBACNC,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOjD,OAAOsD;gBACtB,MAAM,EAAEvD,IAAI,EAAE,GAAGuD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ1D,KAAK,GAAG,GAAG;wBACT0D;oBACF;gBACF;gBAGA,MAAMvD,YAAYoD,YAAY,SAAS;gBACvCnD,OAAOD,WAAW;gBAElB,MAAMwD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DnE,MAAM;oBACN,MAAMgF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOjB,OAAO;oBACd,IAAIA,iBAAiB+B,cACnBV,UAAUrB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEgC,IAAI,EAAE/B,KAAK,EAAEgC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACLlE,OACEgE,MAAM,CAACP,YAAY,KAAK1C,QACxB;oBAEFmD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCtE,KAAK,KAAK,GAAGqC;oBACbrC,KAAK,OAAO,GAAGqE;oBACf,MAAM,IAAIvB,MAAM,CAAC,kBAAkB,EAAEuB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLnB;oBACAgC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMvD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEkD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASlB,KAAK,SAAS,CAACkB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAMhD,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACwD;QAE1C,IAAI,CAAC/C,QACH,MAAM,IAAIuC,MACR;QAIJ,MAAM,EAAEtC,MAAM,EAAE6D,OAAO,EAAE,GAAG9D;QAE5B,OAAO;YACLC;YACA6D;YACAhE;QACF;IACF;IAEQ,UAAUkE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;QACF;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM5E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW6E;QAE1B,MAAMvE,SAASP,QAAQ,SAAS;QAChC,MAAM,EAAE+E,SAAS,EAAEC,eAAe,EAAE,GAAG1B;QAEvChD,OAAOqE,WAAW;QAClBrE,OAAOyE,WAAW;QAClBzE,OAAO0E,iBAAiB;QAExB1E,OACE0E,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBzD,KAAK,GAAG;QACjC,IAAI0D,iBAAiBD;QACrB,IAAIE,eAAe;QAEnB,MAAOD,iBAAiBD,oBAAoBF,UAAW;YACrD,MAAMK,oBAAoB5D,KAAK,GAAG;YAClC0D,iBAAiBE;YACjB,MAAM5B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACAhC,QACAkC;YAGF,MAAM9C,SAAU,MAAMT,QAAQ,YAAY,CAACwD;YAO3C,IAAI/C,QAAQ,QACV,OAAO;gBACL,QAAQY;gBACRd;YACF;YAGF4E,eACE1E,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEmE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMS,MAAM7D,KAAK,GAAG;YACpB,IAAI6D,MAAMD,oBAAoBJ,iBAAiB;gBAC7C,MAAMpC,gBAAgBoC,kBAAmBK,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAME,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQ1C;gBACV;gBACA,MAAM5C,QAAQ,MAAM,CAACsF;YACvB;QACF;QAEA,OAAOtF,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEmF,cAAc;IACnE;IAxkBA,YACEI,iBAAoC,EACpCC,OAAgB,EAChBC,IAMC,CACD;QAjCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAkBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;QAClC;IACF;AAgjBF"}
1
+ {"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n reasoning_content,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n };\n executorContext.uiContext = uiContext;\n\n const finalActions = [...(actions || [])];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n let errorFlag = false;\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check if task is complete\n if (!planResult?.more_actions_needed_by_instruction) {\n if (errorFlag) {\n debug(\n 'more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run',\n );\n } else {\n break;\n }\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage =\n 'I have finished the action previously planned.';\n }\n }\n\n const finalResult = {\n output: {\n yamlFlow,\n },\n runner,\n };\n return finalResult;\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump, reasoning_content } = extractResult;\n applyDump(dump);\n task.reasoning_content = reasoning_content;\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n undefined,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","startTime","Date","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","reasoning_content","finalActions","timeNow","timeRemaining","executables","errorFlag","String","Error","errorMsg","finalResult","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","lastCheckStart","errorThought","currentCheckStart","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA+CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAoBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA6BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIkB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJL,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEc,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAGlC,MAAO,KAAM;YACX,MAAMb,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;gBACF;gBACA,UAAU,OAAOb,OAAOC;oBACtB,MAAMmB,YAAYC,KAAK,GAAG;oBAC1B,MAAM,EAAEnB,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEoB,MAAM,EAAE,GAAG7B;oBACnB,MAAM8B,qBACJD,AAAW,kBAAXA,SACI7B,uBAAuB,kBAAkB,GACzCyB;oBAEN,MAAMM,cAAc,IAAI,CAAC,cAAc;oBACvCxC,MACE,sCACAwC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDtB,OAAOuB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAG9B,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CwB;wBACA,aAAa/B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACAC;oBACF;oBACA9B,MAAM,cAAc+C,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACLC,iBAAiB,EAClB,GAAGX;oBAEJ3B,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCoC;oBACF;oBACApC,gBAAgB,IAAI,CAAC,KAAK,GAAGmC;oBAC7BnC,gBAAgB,IAAI,CAAC,iBAAiB,GAAGsC;oBACzCtC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS+B,WAAW,EAAE;wBACtBE;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA3B,gBAAgB,SAAS,GAAGC;oBAE5B,MAAMsC,eAAe;2BAAKR,WAAW,EAAE;qBAAE;oBAEzC,IAAIM,OAAO;wBACT,MAAMG,UAAUpB,KAAK,GAAG;wBACxB,MAAMqB,gBAAgBJ,QAASG,CAAAA,UAAUrB,SAAQ;wBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;oBAErC;oBAEAvC,OAAO,CAACgC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAatB,QAAQ;YAG3B,MAAMd,QAAQoC,YAAY,WAAW,EAAE;YACvCZ,SAAS,IAAI,IAAKY,YAAY,YAAY,EAAE;YAE5C,IAAIe;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CnD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAOwB,OAAO;gBACd,OAAOtC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEsC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5EvC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDmC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAGnD,IAAIiB,YAAY;YAChB,IAAI;gBACF,MAAM/C,QAAQ,YAAY,CAAC8C,YAAY,KAAK;YAC9C,EAAE,OAAOR,OAAY;gBACnBS,YAAY;gBACZzB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,+BAA+B,EAAEgB,OAAO,WAAWU,OAAOV,QAAQ;gBACrHnD,MACE,yFACAmD,iBAAiBW,QAAQX,MAAM,OAAO,GAAGU,OAAOV,QAChD,6CACAhB;YAEJ;YAEA,IAAIA,8BAA8BjC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAAC+B,YAAY,oCACf,IAAIgB,WACF5D,MACE;iBAGF;YAKJ,EAAE+B;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM8B,WAAW,CAAC,UAAU,EAAE9B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOpB,QAAQ,eAAe,CAACkD;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAC7C;QAEN;QAEA,MAAMC,cAAc;YAClB,QAAQ;gBACNhC;YACF;YACAZ;QACF;QACA,OAAO4C;IACT;IAEQ,oBACNC,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOlD,OAAOuD;gBACtB,MAAM,EAAExD,IAAI,EAAE,GAAGwD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ3D,KAAK,GAAG,GAAG;wBACT2D;oBACF;gBACF;gBAGA,MAAMxD,YAAYqD,YAAY,SAAS;gBACvCpD,OAAOD,WAAW;gBAElB,MAAMyD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DpE,MAAM;oBACN,MAAMiF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOlB,OAAO;oBACd,IAAIA,iBAAiBgC,cACnBV,UAAUtB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEiC,IAAI,EAAEhC,KAAK,EAAEiC,OAAO,EAAEX,IAAI,EAAEnB,iBAAiB,EAAE,GAAGwB;gBAC1DN,UAAUC;gBACV3D,KAAK,iBAAiB,GAAGwC;gBAEzB,IAAI+B,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACLnE,OACEiE,MAAM,CAACP,YAAY,KAAK3C,QACxB;oBAEFoD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCvE,KAAK,KAAK,GAAGqC;oBACbrC,KAAK,OAAO,GAAGsE;oBACf,MAAM,IAAIvB,MAAM,CAAC,kBAAkB,EAAEuB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLpB;oBACAiC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMxD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEmD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASnB,KAAK,SAAS,CAACmB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAMjD,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACyD;QAE1C,IAAI,CAAChD,QACH,MAAM,IAAIwC,MACR;QAIJ,MAAM,EAAEvC,MAAM,EAAE8D,OAAO,EAAE,GAAG/D;QAE5B,OAAO;YACLC;YACA8D;YACAjE;QACF;IACF;IAEQ,UAAUmE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;QACF;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM7E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW8E;QAE1B,MAAMxE,SAASP,QAAQ,SAAS;QAChC,MAAM,EAAEgF,SAAS,EAAEC,eAAe,EAAE,GAAG1B;QAEvCjD,OAAOsE,WAAW;QAClBtE,OAAO0E,WAAW;QAClB1E,OAAO2E,iBAAiB;QAExB3E,OACE2E,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmB1D,KAAK,GAAG;QACjC,IAAI2D,iBAAiBD;QACrB,IAAIE,eAAe;QAEnB,MAAOD,iBAAiBD,oBAAoBF,UAAW;YACrD,MAAMK,oBAAoB7D,KAAK,GAAG;YAClC2D,iBAAiBE;YACjB,MAAM5B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACAjC,QACAmC;YAGF,MAAM/C,SAAU,MAAMT,QAAQ,YAAY,CAACyD;YAO3C,IAAIhD,QAAQ,QACV,OAAO;gBACL,QAAQY;gBACRd;YACF;YAGF6E,eACE3E,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEoE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMS,MAAM9D,KAAK,GAAG;YACpB,IAAI8D,MAAMD,oBAAoBJ,iBAAiB;gBAC7C,MAAMpC,gBAAgBoC,kBAAmBK,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAME,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQ1C;gBACV;gBACA,MAAM7C,QAAQ,MAAM,CAACuF;YACvB;QACF;QAEA,OAAOvF,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEoF,cAAc;IACnE;IAzkBA,YACEI,iBAAoC,EACpCC,OAAgB,EAChBC,IAMC,CACD;QAjCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAkBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;QAClC;IACF;AAijBF"}
@@ -101,7 +101,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
101
101
  return;
102
102
  }
103
103
  }
104
- const getMidsceneVersion = ()=>"1.0.5-beta-20251230135517.0";
104
+ const getMidsceneVersion = ()=>"1.0.5-beta-20251231065132.0";
105
105
  const parsePrompt = (prompt)=>{
106
106
  if ('string' == typeof prompt) return {
107
107
  textPrompt: prompt,
@@ -142,7 +142,8 @@ async function AiLocateElement(options) {
142
142
  errors: errors
143
143
  },
144
144
  rawResponse,
145
- usage: res.usage
145
+ usage: res.usage,
146
+ reasoning_content: res.reasoning_content
146
147
  };
147
148
  }
148
149
  async function AiLocateSection(options) {
@@ -250,7 +251,8 @@ async function AiExtractElementInfo(options) {
250
251
  const result = await callAIWithObjectResponse(msgs, AIActionType.EXTRACT_DATA, modelConfig);
251
252
  return {
252
253
  parseResult: result.content,
253
- usage: result.usage
254
+ usage: result.usage,
255
+ reasoning_content: result.reasoning_content
254
256
  };
255
257
  }
256
258
  async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from '../common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(\n msgs,\n AIActionType.INSPECT_ELEMENT, // Reuse existing action type for now\n modelConfig,\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;AAsDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IASC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,4BAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;QACxBO,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAad,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCe,cAAcf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCgB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,iBAAXA,QAAyB;QAClC,MAAMa,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMvB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMkB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMC,MAAM,MAAMlB,SAASR,MAAM2B,aAAa,eAAe,EAAElB;IAE/D,MAAMmB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAI,UAAUI,gBACRR,IAAI,OAAO,CAAC,IAAI,EAChBP,YACAC,aACAf,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BgB,oBACAC,qBACAZ;YAGFjB,aAAa,WAAWqC;YAExB,MAAMK,aAAa;gBACjB,GAAGL,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMM,UAA+BC,0BACnCF,YACAtB;YAEFmB,SAAS,EAAE;YAEX,IAAII,SACFL,kBAAkB;gBAACK;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACN,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEO,IAAI,CAAC,CAAC;aAFtBP,SAAS;YAACO;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMT;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;IAClB;AACF;AAEO,eAAee,gBAAgBpC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoC,kBAAkB,EAAEjC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAe2B,4BAA4BjC;IACjD,MAAMkC,gCAAgCC,0BACpCjD,wBAAwB8C;IAE1B,MAAM1C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMjB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQ4C,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1C,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,yBACnB/C,MACA2B,aAAa,YAAY,EACzBlB;IAGF,IAAIuC;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,gBACjBe,aACA3C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuD;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnD,aAAa,wBAAwBwD;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASpB,MAAM,OAAO,CAACoB,OAC/B,GAAG,CAAC,CAACA,OACGnB,gBACLmB,MACA/C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByD;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DzD,aAAa,iBAAiB2D;QAG9BN,cAAcQ,iBAAiBF,YAAYhD,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqD;IAC1C;IAEA,IAAIS,cAAc9C;IAClB,IAAIqC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BhD,kBACAqC,aACAtC,AAAW,iBAAXA;QAEF+C,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAajB,KAAK,SAAS,CAACiB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvD,OAO7C;IACC,MAAM,EAAEwD,SAAS,EAAEvD,OAAO,EAAEwD,aAAa,EAAE/D,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+C;IACrB,MAAM,EAAEpD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM0D,wBAAwBC,uBAC5B5D,QAAQ,eAAe,IAAI,IAC3BwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvD;YACL,QAAQ;QACV;IACF;IAGFuD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkD;QACX;KACD;IAED,IAAInE,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,yBACnB/C,MACA2B,aAAa,YAAY,EACzBlB;IAEF,OAAO;QACL,aAAaqC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB5D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeqD;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMpE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,MAAMxB,SAAS,MAAMtC,SACnBR,MACA2B,aAAa,eAAe,EAC5BlB;IAGF,OAAO;QACL,kBAAkBqC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.mjs","sources":["../../../src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from '../common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from '../common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `this is the reference image named '${item.name}':`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n reasoning_content?: string;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const targetElementDescriptionText = extraTextFromUserPrompt(\n targetElementDescription,\n );\n const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if (\n 'bbox' in res.content &&\n Array.isArray(res.content.bbox) &&\n res.content.bbox.length >= 1\n ) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement = generateElementByPosition(\n rectCenter,\n targetElementDescriptionText as string,\n );\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n reasoning_content: res.reasoning_content,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = sectionLocatorInstruction(\n extraTextFromUserPrompt(sectionDescription),\n );\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen2.5-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n reasoning_content: result.reasoning_content,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(\n msgs,\n AIActionType.INSPECT_ELEMENT, // Reuse existing action type for now\n modelConfig,\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","targetElementDescriptionText","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;AAsDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;IAEA,MAAMC,OAAyC,EAAE;IACjD,IAAID,kBAAkB,QAAQ,QAAQ;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQF,iBAAiB,MAAM,CAAE;YAC1C,MAAMG,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACF,iBAAiB,uBAAuB;YAG5CC,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,mCAAmC,EAAEC,KAAK,IAAI,CAAC,EAAE,CAAC;oBAC3D;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IAUC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,+BAA+BjB,wBACnCW;IAEF,MAAMO,wBAAwBC,kBAAkBF;IAChD,MAAMG,eAAeC,4BAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;QACxBO,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAad,QAAQ,YAAY,CAAC,IAAI,EAAE;QACxCe,cAAcf,QAAQ,YAAY,CAAC,IAAI,EAAE;QACzCgB,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,iBAAXA,QAAyB;QAClC,MAAMa,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAMvB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOP,0BAAuC;QAChD,MAAMkB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQS,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMC,MAAM,MAAMlB,SAASR,MAAM2B,aAAa,eAAe,EAAElB;IAE/D,MAAMmB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IACE,UAAUA,IAAI,OAAO,IACrBO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,KAC9BA,IAAI,OAAO,CAAC,IAAI,CAAC,MAAM,IAAI,GAC3B;YACAI,UAAUI,gBACRR,IAAI,OAAO,CAAC,IAAI,EAChBP,YACAC,aACAf,QAAQ,YAAY,EAAE,MAAM,MAC5BA,QAAQ,YAAY,EAAE,MAAM,KAC5BgB,oBACAC,qBACAZ;YAGFjB,aAAa,WAAWqC;YAExB,MAAMK,aAAa;gBACjB,GAAGL,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMM,UAA+BC,0BACnCF,YACAtB;YAEFmB,SAAS,EAAE;YAEX,IAAII,SACFL,kBAAkB;gBAACK;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACN,UAAUA,QAAQ,WAAW,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEO,IAAI,CAAC,CAAC;aAFtBP,SAAS;YAACO;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMT;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;QAChB,mBAAmBA,IAAI,iBAAiB;IAC1C;AACF;AAEO,eAAee,gBAAgBpC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEoC,kBAAkB,EAAEjC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAe2B,4BAA4BjC;IACjD,MAAMkC,gCAAgCC,0BACpCjD,wBAAwB8C;IAE1B,MAAM1C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMiC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMjB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQ4C,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA1C,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,yBACnB/C,MACA2B,aAAa,YAAY,EACzBlB;IAGF,IAAIuC;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,gBACjBe,aACA3C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFf,aAAa,0BAA0BuD;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DnD,aAAa,wBAAwBwD;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASpB,MAAM,OAAO,CAACoB,OAC/B,GAAG,CAAC,CAACA,OACGnB,gBACLmB,MACA/C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNf,aAAa,qBAAqByD;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7DzD,aAAa,iBAAiB2D;QAG9BN,cAAcQ,iBAAiBF,YAAYhD,QAAQ,IAAI,EAAEI;QACzDf,aAAa,2BAA2BqD;IAC1C;IAEA,IAAIS,cAAc9C;IAClB,IAAIqC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BhD,kBACAqC,aACAtC,AAAW,iBAAXA;QAEF+C,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAajB,KAAK,SAAS,CAACiB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwBvD,OAO7C;IACC,MAAM,EAAEwD,SAAS,EAAEvD,OAAO,EAAEwD,aAAa,EAAE/D,gBAAgB,EAAEU,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAe+C;IACrB,MAAM,EAAEpD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM0D,wBAAwBC,uBAC5B5D,QAAQ,eAAe,IAAI,IAC3BwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,eAAe,uBAAuB,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKvD;YACL,QAAQ;QACV;IACF;IAGFuD,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMhE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASkD;QACX;KACD;IAED,IAAInE,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAC,KAAK,IAAI,IAAIyB;IACf;IAEA,MAAMqB,SAAS,MAAMC,yBACnB/C,MACA2B,aAAa,YAAY,EACzBlB;IAEF,OAAO;QACL,aAAaqC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;QACnB,mBAAmBA,OAAO,iBAAiB;IAC7C;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB5D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMO,eAAeqD;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMpE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,MAAMxB,SAAS,MAAMtC,SACnBR,MACA2B,aAAa,eAAe,EAC5BlB;IAGF,OAAO;QACL,kBAAkBqC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -82,8 +82,8 @@ async function plan(userInstruction, opts) {
82
82
  ...instruction,
83
83
  ...historyLog
84
84
  ];
85
- const { content: planFromAI, contentString: rawResponse, usage } = await callAIWithObjectResponse(msgs, AIActionType.PLAN, modelConfig, {
86
- qwen3_vl_enable_thinking: opts.qwen3_vl_enable_thinking
85
+ const { content: planFromAI, contentString: rawResponse, usage, reasoning_content } = await callAIWithObjectResponse(msgs, AIActionType.PLAN, modelConfig, {
86
+ deepThink: opts.deepThink
87
87
  });
88
88
  const actions = planFromAI.action ? [
89
89
  planFromAI.action
@@ -93,6 +93,7 @@ async function plan(userInstruction, opts) {
93
93
  actions,
94
94
  rawResponse,
95
95
  usage,
96
+ reasoning_content,
96
97
  yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace, planFromAI.sleep)
97
98
  };
98
99
  assert(planFromAI, "can't get plans from AI");
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n qwen3_vl_enable_thinking?: boolean;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n {\n qwen3_vl_enable_thinking: opts.qwen3_vl_enable_thinking,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","callAIWithObjectResponse","AIActionType","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","undefined","fillBboxParam","console"],"mappings":";;;;;;AAsBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;IAC/B;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACN,GAAG,MAAMC,yBACRJ,MACAK,aAAa,IAAI,EACjBvB,aACA;QACE,0BAA0BF,KAAK,wBAAwB;IACzD;IAGF,MAAM0B,UAAUL,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMM,cAAkC;QACtC,GAAGN,UAAU;QACbK;QACAJ;QACAC;QACA,UAAUK,uBACRF,SACA1B,KAAK,WAAW,EAChBqB,WAAW,KAAK;IAEpB;IAEAQ,OAAOR,YAAY;IAEnBK,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBhC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAAC8B,SAAWA,OAAO,IAAI,KAAKC;QAG9BnC,MAAM,+BAA+BoC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENpC,MAAM,gBAAgBqC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgB9B,AAAW+B,WAAX/B,QAElBwB,OAAO,KAAK,CAACK,MAAM,GAAGG,cACpBF,cACA1B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEA,IACEoB,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAxC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOK;AACT"}
1
+ {"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: boolean;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n {\n deepThink: opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","screenshotBase64","size","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","AIActionType","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","undefined","fillBboxParam","console"],"mappings":";;;;;;AAsBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;IAC/B;IAEA,IAAIS,eAAeL;IACnB,IAAIM,aAAaL,KAAK,KAAK;IAC3B,IAAIM,cAAcN,KAAK,MAAM;IAC7B,MAAMO,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,yBACRL,MACAM,aAAa,IAAI,EACjBxB,aACA;QACE,WAAWF,KAAK,SAAS;IAC3B;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,MAAMO,cAAkC;QACtC,GAAGP,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUK,uBACRF,SACA3B,KAAK,WAAW,EAChBqB,WAAW,KAAK;IAEpB;IAEAS,OAAOT,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBjC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAAC+B,SAAWA,OAAO,IAAI,KAAKC;QAG9BpC,MAAM,+BAA+BqC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENrC,MAAM,gBAAgBsC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgB/B,AAAWgC,WAAXhC,QAElByB,OAAO,KAAK,CAACK,MAAM,GAAGG,cACpBF,cACA3B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEA,IACEqB,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAzC;IAIJI,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOM;AACT"}
@@ -116,6 +116,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
116
116
  const isStreaming = options?.stream && options?.onChunk;
117
117
  let content;
118
118
  let accumulated = '';
119
+ let accumulatedReasoning = '';
119
120
  let usage;
120
121
  let timeCost;
121
122
  const buildUsageInfo = (usageData)=>{
@@ -138,18 +139,25 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
138
139
  max_tokens: 'number' == typeof maxTokens ? maxTokens : void 0,
139
140
  ...'qwen2.5-vl' === vlMode ? {
140
141
  vl_high_resolution_images: true
141
- } : {},
142
- ...options?.qwen3_vl_enable_thinking ? {
143
- enable_thinking: true
144
142
  } : {}
145
143
  };
144
+ const { config: deepThinkConfig, debugMessage, warningMessage } = resolveDeepThinkConfig({
145
+ deepThink: options?.deepThink,
146
+ vlMode
147
+ });
148
+ if (debugMessage) debugCall(debugMessage);
149
+ if (warningMessage) {
150
+ debugCall(warningMessage);
151
+ console.warn(warningMessage);
152
+ }
146
153
  try {
147
154
  debugCall(`sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`);
148
155
  if (isStreaming) {
149
156
  const stream = await completion.create({
150
157
  model: modelName,
151
158
  messages,
152
- ...commonConfig
159
+ ...commonConfig,
160
+ ...deepThinkConfig
153
161
  }, {
154
162
  stream: true
155
163
  });
@@ -159,6 +167,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
159
167
  if (chunk.usage) usage = chunk.usage;
160
168
  if (content || reasoning_content) {
161
169
  accumulated += content;
170
+ accumulatedReasoning += reasoning_content;
162
171
  const chunkData = {
163
172
  content,
164
173
  reasoning_content,
@@ -195,16 +204,19 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
195
204
  const result = await completion.create({
196
205
  model: modelName,
197
206
  messages,
198
- ...commonConfig
207
+ ...commonConfig,
208
+ ...deepThinkConfig
199
209
  });
200
210
  timeCost = Date.now() - startTime;
201
211
  debugProfileStats(`model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}, temperature, ${temperature ?? ''}`);
202
212
  debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);
203
213
  assert(result.choices, `invalid response from LLM service: ${JSON.stringify(result)}`);
204
214
  content = result.choices[0].message.content;
215
+ accumulatedReasoning = result.choices[0].message?.reasoning_content || '';
205
216
  usage = result.usage;
206
217
  }
207
- debugCall(`response: ${content}`);
218
+ debugCall(`response reasoning content: ${accumulatedReasoning}`);
219
+ debugCall(`response content: ${content}`);
208
220
  assert(content, 'empty content');
209
221
  if (isStreaming && !usage) {
210
222
  const estimatedTokens = Math.max(1, Math.floor((content || '').length / 4));
@@ -216,6 +228,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
216
228
  }
217
229
  return {
218
230
  content: content || '',
231
+ reasoning_content: accumulatedReasoning || void 0,
219
232
  usage: buildUsageInfo(usage),
220
233
  isStreamed: !!isStreaming
221
234
  };
@@ -229,7 +242,7 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
229
242
  }
230
243
  async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig, options) {
231
244
  const response = await callAI(messages, AIActionTypeValue, modelConfig, {
232
- qwen3_vl_enable_thinking: options?.qwen3_vl_enable_thinking
245
+ deepThink: options?.deepThink
233
246
  });
234
247
  assert(response, 'empty response');
235
248
  const vlMode = modelConfig.vlMode;
@@ -238,7 +251,8 @@ async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig
238
251
  return {
239
252
  content: jsonContent,
240
253
  contentString: response.content,
241
- usage: response.usage
254
+ usage: response.usage,
255
+ reasoning_content: response.reasoning_content
242
256
  };
243
257
  }
244
258
  async function callAIWithStringResponse(msgs, AIActionTypeValue, modelConfig) {
@@ -263,6 +277,31 @@ function preprocessDoubaoBboxJson(input) {
263
277
  if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
264
278
  return input;
265
279
  }
280
+ function resolveDeepThinkConfig({ deepThink, vlMode }) {
281
+ if (void 0 === deepThink) return {
282
+ config: {},
283
+ debugMessage: void 0
284
+ };
285
+ if ('qwen3-vl' === vlMode) return {
286
+ config: {
287
+ enable_thinking: deepThink
288
+ },
289
+ debugMessage: `deepThink mapped to enable_thinking=${deepThink} for qwen3-vl`
290
+ };
291
+ if ('doubao-vision' === vlMode) return {
292
+ config: {
293
+ thinking: {
294
+ type: deepThink ? 'enabled' : 'disabled'
295
+ }
296
+ },
297
+ debugMessage: `deepThink mapped to thinking.type=${deepThink ? 'enabled' : 'disabled'} for doubao-vision`
298
+ };
299
+ return {
300
+ config: {},
301
+ debugMessage: `deepThink ignored: unsupported model_family "${vlMode ?? 'default'}"`,
302
+ warningMessage: `The "deepThink" option is not supported for model_family "${vlMode ?? 'default'}".`
303
+ };
304
+ }
266
305
  function normalizeJsonObject(obj) {
267
306
  if (null == obj) return obj;
268
307
  if (Array.isArray(obj)) return obj.map((item)=>normalizeJsonObject(item));
@@ -307,6 +346,6 @@ function safeParseJson(input, vlMode) {
307
346
  }
308
347
  throw Error(`failed to parse LLM response into JSON. Error - ${String(lastError ?? 'unknown error')}. Response - \n ${input}`);
309
348
  }
310
- export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, safeParseJson };
349
+ export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, resolveDeepThinkConfig, safeParseJson };
311
350
 
312
351
  //# sourceMappingURL=index.mjs.map