@midscene/core 1.2.2-beta-20260115120150.0 → 1.2.2-beta-20260116060040.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +13 -14
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +15 -29
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/llm-planning.mjs +12 -3
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +44 -9
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +11 -10
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/common.mjs +14 -5
- package/dist/es/common.mjs.map +1 -1
- package/dist/es/device/index.mjs +29 -4
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml/player.mjs +3 -4
- package/dist/es/yaml/player.mjs.map +1 -1
- package/dist/lib/agent/agent.js +12 -13
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +15 -29
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/llm-planning.js +11 -2
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +44 -9
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +11 -10
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/common.js +20 -5
- package/dist/lib/common.js.map +1 -1
- package/dist/lib/device/index.js +53 -16
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml/player.js +3 -4
- package/dist/lib/yaml/player.js.map +1 -1
- package/dist/types/agent/agent.d.ts +4 -15
- package/dist/types/agent/tasks.d.ts +4 -4
- package/dist/types/ai-model/service-caller/index.d.ts +3 -3
- package/dist/types/common.d.ts +8 -1
- package/dist/types/device/index.d.ts +85 -23
- package/dist/types/types.d.ts +2 -2
- package/dist/types/yaml.d.ts +1 -5
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeepThinkOption,\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n fileChooserAccept?: string[],\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n deepThink,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n reasoning_content,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n };\n executorContext.uiContext = uiContext;\n\n const finalActions = [...(actions || [])];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n let errorFlag = false;\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check if task is complete\n if (!planResult?.more_actions_needed_by_instruction) {\n if (errorFlag) {\n debug(\n 'more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run',\n );\n } else {\n break;\n }\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage =\n 'I have finished the action previously planned.';\n }\n }\n\n return {\n output: {\n yamlFlow,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump, reasoning_content } = extractResult;\n applyDump(dump);\n task.reasoning_content = reasoning_content;\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","withFileChooser","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","startTime","Date","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","reasoning_content","finalActions","timeNow","timeRemaining","executables","errorFlag","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","lastCheckStart","errorThought","currentCheckStart","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;AAgDA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAoBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA6BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BC,iBAA4B,EAQ5B;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAED,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC;IAGN;IAEA,MAAc,UACZN,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAQ3B;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIoB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJP,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEgB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAGlC,MAAO,KAAM;YACX,MAAMf,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;gBACF;gBACA,UAAU,OAAOd,OAAOC;oBACtB,MAAMqB,YAAYC,KAAK,GAAG;oBAC1B,MAAM,EAAErB,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEsB,MAAM,EAAE,GAAG/B;oBACnB,MAAMgC,qBACJD,AAAW,kBAAXA,SACI/B,uBAAuB,kBAAkB,GACzC2B;oBAEN,MAAMM,cAAc,IAAI,CAAC,cAAc;oBACvC1C,MACE,sCACA0C,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDxB,OAAOyB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAGhC,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3C0B;wBACA,aAAajC;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACAC;oBACF;oBACA9B,MAAM,cAAciD,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACLC,iBAAiB,EAClB,GAAGX;oBAEJ7B,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCsC;oBACF;oBACAtC,gBAAgB,IAAI,CAAC,KAAK,GAAGqC;oBAC7BrC,gBAAgB,IAAI,CAAC,iBAAiB,GAAGwC;oBACzCxC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAASiC,WAAW,EAAE;wBACtBE;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA7B,gBAAgB,SAAS,GAAGC;oBAE5B,MAAMwC,eAAe;2BAAKR,WAAW,EAAE;qBAAE;oBAEzC,IAAIM,OAAO;wBACT,MAAMG,UAAUpB,KAAK,GAAG;wBACxB,MAAMqB,gBAAgBJ,QAASG,CAAAA,UAAUrB,SAAQ;wBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;oBAErC;oBAEAzC,OAAO,CAACkC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAaxB,QAAQ;YAG3B,MAAMd,QAAQsC,YAAY,WAAW,EAAE;YACvCZ,SAAS,IAAI,IAAKY,YAAY,YAAY,EAAE;YAE5C,IAAIe;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9CrD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAO0B,OAAO;gBACd,OAAOxC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEwC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5EzC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDqC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAGnD,IAAIiB,YAAY;YAChB,IAAI;gBACF,MAAMjD,QAAQ,YAAY,CAACgD,YAAY,KAAK;YAC9C,EAAE,OAAOR,OAAY;gBACnBS,YAAY;gBACZzB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,+BAA+B,EAAEgB,OAAO,WAAWU,OAAOV,QAAQ;gBACrHrD,MACE,yFACAqD,iBAAiBW,QAAQX,MAAM,OAAO,GAAGU,OAAOV,QAChD,6CACAhB;YAEJ;YAEA,IAAIA,8BAA8BnC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAACiC,YAAY,oCACf,IAAIgB,WACF9D,MACE;iBAGF;YAKJ,EAAEiC;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM8B,WAAW,CAAC,UAAU,EAAE9B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOtB,QAAQ,eAAe,CAACoD;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAC7C;QAEN;QAEA,OAAO;YACL,QAAQ;gBACN/B;YACF;YACAd;QACF;IACF;IAEQ,oBACN8C,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAOnD,OAAOwD;gBACtB,MAAM,EAAEzD,IAAI,EAAE,GAAGyD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZ5D,KAAK,GAAG,GAAG;wBACT4D;oBACF;gBACF;gBAGA,MAAMzD,YAAYsD,YAAY,SAAS;gBACvCrD,OAAOD,WAAW;gBAElB,MAAM0D,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DrE,MAAM;oBACN,MAAMkF,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOjB,OAAO;oBACd,IAAIA,iBAAiB+B,cACnBV,UAAUrB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEgC,IAAI,EAAE/B,KAAK,EAAEgC,OAAO,EAAEX,IAAI,EAAElB,iBAAiB,EAAE,GAAGuB;gBAC1DN,UAAUC;gBACV5D,KAAK,iBAAiB,GAAG0C;gBAEzB,IAAI8B,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACLpE,OACEkE,MAAM,CAACP,YAAY,KAAK1C,QACxB;oBAEFmD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCxE,KAAK,KAAK,GAAGuC;oBACbvC,KAAK,OAAO,GAAGuE;oBACf,MAAM,IAAItB,MAAM,CAAC,kBAAkB,EAAEsB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLnB;oBACAgC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMzD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEoD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASlB,KAAK,SAAS,CAACkB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAMlD,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAAC0D;QAE1C,IAAI,CAACjD,QACH,MAAM,IAAI0C,MACR;QAIJ,MAAM,EAAEzC,MAAM,EAAE+D,OAAO,EAAE,GAAGhE;QAE5B,OAAO;YACLC;YACA+D;YACAlE;QACF;IACF;IAEQ,UAAUoE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;QACF;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM9E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW+E;QAE1B,MAAMzE,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJiF,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG7B;QACJ,MAAM8B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEA/E,OAAOuE,WAAW;QAClBvE,OAAO2E,WAAW;QAClB3E,OAAO4E,iBAAiB;QAExB5E,OACE4E,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmB7D,KAAK,GAAG;QACjC,IAAI8D,iBAAiBD;QACrB,IAAIE,eAAe;QAEnB,MAAOD,iBAAiBD,oBAAoBN,UAAW;YACrD,MAAMS,oBAAoBhE,KAAK,GAAG;YAClC8D,iBAAiBE;YACjB,MAAMhC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACA+B,mBACA7B;YAGF,MAAMhD,SAAU,MAAMT,QAAQ,YAAY,CAAC0D;YAO3C,IAAIjD,QAAQ,QACV,OAAO;gBACL,QAAQc;gBACRhB;YACF;YAGFkF,eACEhF,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEqE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMa,MAAMjE,KAAK,GAAG;YACpB,IAAIiE,MAAMD,oBAAoBR,iBAAiB;gBAC7C,MAAMnC,gBAAgBmC,kBAAmBS,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAME,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQ7C;gBACV;gBACA,MAAM/C,QAAQ,MAAM,CAAC4F;YACvB;QACF;QAEA,OAAO5F,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEyF,cAAc;IACnE;IAtnBA,YACEI,iBAAoC,EACpCC,OAAgB,EAChBC,IAMC,CACD;QAjCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAkBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;QAClC;IACF;AA8lBF;AAEO,eAAe5E,gBACpB0E,iBAAoC,EACpC3E,iBAAuC,EACvCY,MAAwB;IAExB,IAAI,CAACZ,mBAAmB,QACtB,OAAOY;IAGT,IAAI,CAAC+D,kBAAkB,2BAA2B,EAChD,MAAM,IAAI1C,MACR,CAAC,gCAAgC,EAAE0C,kBAAkB,aAAa,EAAE;IAIxE,MAAMK,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAACjF;IACvB;IAEA,MAAM,EAAEkF,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMR,kBAAkB,2BAA2B,CAACK;IACtD,IAAI;QACF,MAAMzF,SAAS,MAAMqB;QAErB,MAAMU,QAAQ6D;QACd,IAAI7D,OACF,MAAMA;QAER,OAAO/B;IACT,SAAU;QACR2F;IACF;AACF"}
|
|
1
|
+
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport {\n type TMultimodalPrompt,\n type TUserPrompt,\n getReadableTimeString,\n} from '@/common';\nimport type { AbstractInterface, FileChooserHandler } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n DeepThinkOption,\n DeviceAction,\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly providedActionSpace: DeviceAction[];\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n actionSpace: DeviceAction[];\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.providedActionSpace = opts.actionSpace;\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n actionSpace: this.getActionSpace(),\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n private getActionSpace(): DeviceAction[] {\n return this.providedActionSpace;\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n shouldContinuePlanning: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n fileChooserAccept?: string[],\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n return withFileChooser(this.interface, fileChooserAccept, async () => {\n return this.runAction(\n userPrompt,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n includeBboxInPlanning,\n aiActContext,\n cacheable,\n replanningCycleLimitOverride,\n imagesIncludeCount,\n deepThink,\n );\n });\n }\n\n private async runAction(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n aiActContext?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n imagesIncludeCount?: number,\n deepThink?: DeepThinkOption,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n output?: string;\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n let outputString: string | undefined;\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActContext,\n imagesIncludeCount,\n deepThink,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n const actionSpace = this.getActionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n imagesIncludeCount,\n deepThink,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n note,\n error,\n usage,\n rawResponse,\n reasoning_content,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.reasoning_content = reasoning_content;\n executorContext.task.output = {\n actions: actions || [],\n log,\n note,\n yamlFlow: planResult.yamlFlow,\n output: outputString,\n shouldContinuePlanning: planResult.shouldContinuePlanning,\n };\n executorContext.uiContext = uiContext;\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n // todo: set time string\n\n try {\n const result = await session.appendAndRun(executables.tasks);\n outputString = result?.output;\n } catch (error: any) {\n // errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // // Check if task is complete\n if (!planResult?.shouldContinuePlanning) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage = `Time: ${getReadableTimeString()}, I have finished the action previously planned.`;\n }\n }\n\n return {\n output: {\n yamlFlow,\n output: outputString,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump, reasoning_content } = extractResult;\n applyDump(dump);\n task.reasoning_content = reasoning_content;\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const {\n timeoutMs,\n checkIntervalMs,\n domIncluded,\n screenshotIncluded,\n ...restOpt\n } = opt;\n const serviceExtractOpt: ServiceExtractOption = {\n domIncluded,\n screenshotIncluded,\n ...restOpt,\n };\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n serviceExtractOpt,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n\nexport async function withFileChooser<T>(\n interfaceInstance: AbstractInterface,\n fileChooserAccept: string[] | undefined,\n action: () => Promise<T>,\n): Promise<T> {\n if (!fileChooserAccept?.length) {\n return action();\n }\n\n if (!interfaceInstance.registerFileChooserListener) {\n throw new Error(\n `File upload is not supported on ${interfaceInstance.interfaceType}`,\n );\n }\n\n const handler = async (chooser: FileChooserHandler) => {\n await chooser.accept(fileChooserAccept);\n };\n\n const { dispose, getError } =\n await interfaceInstance.registerFileChooserListener(handler);\n try {\n const result = await action();\n // Check for errors that occurred during file chooser handling\n const error = getError();\n if (error) {\n throw error;\n }\n return result;\n } finally {\n dispose();\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","aiActContext","cacheable","replanningCycleLimitOverride","imagesIncludeCount","deepThink","fileChooserAccept","withFileChooser","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","outputString","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","note","error","usage","rawResponse","reasoning_content","executables","getReadableTimeString","String","Error","errorMsg","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","domIncluded","screenshotIncluded","restOpt","serviceExtractOpt","overallStartTime","Date","lastCheckStart","errorThought","currentCheckStart","now","timeRemaining","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder","handler","chooser","dispose","getError"],"mappings":";;;;;;;;;;;;;;;;;;;;;AAmDA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAoBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA6BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEQ,iBAAiC;QACvC,OAAO,IAAI,CAAC,mBAAmB;IACjC;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,wBAAwB;wBACxB,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAC3BC,iBAA4B,EAS5B;QACA,OAAOC,gBAAgB,IAAI,CAAC,SAAS,EAAED,mBAAmB,UACjD,IAAI,CAAC,SAAS,CACnBP,YACAf,wBACAC,6BACAe,uBACAC,cACAC,WACAC,8BACAC,oBACAC;IAGN;IAEA,MAAc,UACZN,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,YAAqB,EACrBC,SAAmB,EACnBC,4BAAqC,EACrCC,kBAA2B,EAC3BC,SAA2B,EAS3B;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMjB,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIoB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJP,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEgB,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAClC,IAAIC;QAGJ,MAAO,KAAM;YACX,MAAMhB,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjBE;oBACAG;oBACAC;gBACF;gBACA,UAAU,OAAOd,OAAOC;oBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEqB,MAAM,EAAE,GAAG9B;oBACnB,MAAM+B,qBACJD,AAAW,kBAAXA,SACI9B,uBAAuB,kBAAkB,GACzC2B;oBAEN,MAAMK,cAAc,IAAI,CAAC,cAAc;oBACvCzC,MACE,sCACAyC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDvB,OAAOwB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAG/B,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,YAAY;wBACjC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CyB;wBACA,aAAahC;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;wBACbI;wBACAC;oBACF;oBACA9B,MAAM,cAAcgD,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,IAAI,EACJC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,iBAAiB,EAClB,GAAGV;oBAEJ5B,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCqC;oBACF;oBACArC,gBAAgB,IAAI,CAAC,KAAK,GAAGoC;oBAC7BpC,gBAAgB,IAAI,CAAC,iBAAiB,GAAGsC;oBACzCtC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAASgC,WAAW,EAAE;wBACtBC;wBACAC;wBACA,UAAUN,WAAW,QAAQ;wBAC7B,QAAQP;wBACR,wBAAwBO,WAAW,sBAAsB;oBAC3D;oBACA5B,gBAAgB,SAAS,GAAGC;oBAE5BC,OAAO,CAACiC,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAavB,QAAQ;YAG3B,MAAMd,QAAQqC,YAAY,WAAW,EAAE;YACvCX,SAAS,IAAI,IAAKW,YAAY,YAAY,EAAE;YAE5C,IAAIW;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9ChD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAOyB,OAAO;gBACd,OAAOvC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEuC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ExC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDoC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAKnD,IAAI;gBACF,MAAMtB,SAAS,MAAMT,QAAQ,YAAY,CAAC2C,YAAY,KAAK;gBAC3DlB,eAAehB,QAAQ;YACzB,EAAE,OAAO8B,OAAY;gBAEnBf;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAEoB,wBAAwB,iCAAiC,EAAEL,OAAO,WAAWM,OAAON,QAAQ;gBACvJpD,MACE,yFACAoD,iBAAiBO,QAAQP,MAAM,OAAO,GAAGM,OAAON,QAChD,6CACAf;YAEJ;YAEA,IAAIA,8BAA8BnC,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAACgC,YAAY,wBACf;YAIF,EAAEZ;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAMyB,WAAW,CAAC,UAAU,EAAEzB,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOtB,QAAQ,eAAe,CAAC+C;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,MAAM,EAAEH,wBAAwB,gDAAgD,CAAC;QAExI;QAEA,OAAO;YACL,QAAQ;gBACNvB;gBACA,QAAQI;YACV;YACAlB;QACF;IACF;IAEQ,oBACNyC,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAO9C,OAAOmD;gBACtB,MAAM,EAAEpD,IAAI,EAAE,GAAGoD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZvD,KAAK,GAAG,GAAG;wBACTuD;oBACF;gBACF;gBAGA,MAAMpD,YAAYiD,YAAY,SAAS;gBACvChD,OAAOD,WAAW;gBAElB,MAAMqD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DhE,MAAM;oBACN,MAAM6E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOb,OAAO;oBACd,IAAIA,iBAAiB2B,cACnBV,UAAUjB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE4B,IAAI,EAAE3B,KAAK,EAAE4B,OAAO,EAAEX,IAAI,EAAEf,iBAAiB,EAAE,GAAGoB;gBAC1DN,UAAUC;gBACVvD,KAAK,iBAAiB,GAAGwC;gBAEzB,IAAI2B,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL/D,OACE6D,MAAM,CAACP,YAAY,KAAKrC,QACxB;oBAEF8C,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCnE,KAAK,KAAK,GAAGsC;oBACbtC,KAAK,OAAO,GAAGkE;oBACf,MAAM,IAAItB,MAAM,CAAC,kBAAkB,EAAEsB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLf;oBACA4B;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMpD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE+C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASd,KAAK,SAAS,CAACc;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAM7C,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACqD;QAE1C,IAAI,CAAC5C,QACH,MAAM,IAAIqC,MACR;QAIJ,MAAM,EAAEpC,MAAM,EAAE0D,OAAO,EAAE,GAAG3D;QAE5B,OAAO;YACLC;YACA0D;YACA7D;QACF;IACF;IAEA,MAAM,aAAa+D,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMzE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW0E;QAE1B,MAAMpE,SAASP,QAAQ,SAAS;QAChC,MAAM,EACJ4E,SAAS,EACTC,eAAe,EACfC,WAAW,EACXC,kBAAkB,EAClB,GAAGC,SACJ,GAAG7B;QACJ,MAAM8B,oBAA0C;YAC9CH;YACAC;YACA,GAAGC,OAAO;QACZ;QAEA1E,OAAOkE,WAAW;QAClBlE,OAAOsE,WAAW;QAClBtE,OAAOuE,iBAAiB;QAExBvE,OACEuE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAMM,mBAAmBC,KAAK,GAAG;QACjC,IAAIC,iBAAiBF;QACrB,IAAIG,eAAe;QAEnB,MAAOD,iBAAiBF,oBAAoBN,UAAW;YACrD,MAAMU,oBAAoBH,KAAK,GAAG;YAClCC,iBAAiBE;YACjB,MAAMjC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACA+B,mBACA7B;YAGF,MAAM3C,SAAU,MAAMT,QAAQ,YAAY,CAACqD;YAO3C,IAAI5C,QAAQ,QACV,OAAO;gBACL,QAAQc;gBACRhB;YACF;YAGF8E,eACE5E,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEgE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMc,MAAMJ,KAAK,GAAG;YACpB,IAAII,MAAMD,oBAAoBT,iBAAiB;gBAC7C,MAAMW,gBAAgBX,kBAAmBU,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAMG,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQD;gBACV;gBACA,MAAMxF,QAAQ,MAAM,CAACyF;YACvB;QACF;QAEA,OAAOzF,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEqF,cAAc;IACnE;IAlmBA,YACEK,iBAAoC,EACpCC,OAAgB,EAChBC,IAMC,CACD;QAjCF;QAEA;QAEA;QAEA,uBAAiB,uBAAjB;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAkBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,mBAAmB,GAAGD,KAAK,WAAW;QAC3C,IAAI,CAAC,WAAW,GAAG,IAAIE,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;YACzB,aAAa,IAAI,CAAC,cAAc;QAClC;IACF;AA0kBF;AAEO,eAAezE,gBACpBuE,iBAAoC,EACpCxE,iBAAuC,EACvCW,MAAwB;IAExB,IAAI,CAACX,mBAAmB,QACtB,OAAOW;IAGT,IAAI,CAAC6D,kBAAkB,2BAA2B,EAChD,MAAM,IAAI5C,MACR,CAAC,gCAAgC,EAAE4C,kBAAkB,aAAa,EAAE;IAIxE,MAAMK,UAAU,OAAOC;QACrB,MAAMA,QAAQ,MAAM,CAAC9E;IACvB;IAEA,MAAM,EAAE+E,OAAO,EAAEC,QAAQ,EAAE,GACzB,MAAMR,kBAAkB,2BAA2B,CAACK;IACtD,IAAI;QACF,MAAMtF,SAAS,MAAMoB;QAErB,MAAMU,QAAQ2D;QACd,IAAI3D,OACF,MAAMA;QAER,OAAO9B;IACT,SAAU;QACRwF;IACF;AACF"}
|
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -103,7 +103,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
103
103
|
return;
|
|
104
104
|
}
|
|
105
105
|
}
|
|
106
|
-
const getMidsceneVersion = ()=>"1.2.2-beta-
|
|
106
|
+
const getMidsceneVersion = ()=>"1.2.2-beta-20260116060040.0";
|
|
107
107
|
const parsePrompt = (prompt)=>{
|
|
108
108
|
if ('string' == typeof prompt) return {
|
|
109
109
|
textPrompt: prompt,
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { paddingToMatchBlockByBase64 } from "@midscene/shared/img";
|
|
2
2
|
import { getDebug } from "@midscene/shared/logger";
|
|
3
3
|
import { assert } from "@midscene/shared/utils";
|
|
4
|
-
import { buildYamlFlowFromPlans, fillBboxParam, findAllMidsceneLocatorField } from "../common.mjs";
|
|
4
|
+
import { buildYamlFlowFromPlans, fillBboxParam, finalizeActionName, findAllMidsceneLocatorField } from "../common.mjs";
|
|
5
5
|
import { systemPromptToTaskPlanning } from "./prompt/llm-planning.mjs";
|
|
6
6
|
import { callAIWithObjectResponse } from "./service-caller/index.mjs";
|
|
7
7
|
const debug = getDebug('planning');
|
|
@@ -89,13 +89,23 @@ async function plan(userInstruction, opts) {
|
|
|
89
89
|
const actions = planFromAI.action ? [
|
|
90
90
|
planFromAI.action
|
|
91
91
|
] : [];
|
|
92
|
+
let shouldContinuePlanning = true;
|
|
93
|
+
if (!actions.length) {
|
|
94
|
+
debug('no actions planned and no sleep instruction, stop planning');
|
|
95
|
+
shouldContinuePlanning = false;
|
|
96
|
+
}
|
|
97
|
+
if (actions[0]?.type === finalizeActionName) {
|
|
98
|
+
debug('finalize action planned, stop planning');
|
|
99
|
+
shouldContinuePlanning = false;
|
|
100
|
+
}
|
|
92
101
|
const returnValue = {
|
|
93
102
|
...planFromAI,
|
|
94
103
|
actions,
|
|
95
104
|
rawResponse,
|
|
96
105
|
usage,
|
|
97
106
|
reasoning_content,
|
|
98
|
-
yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace,
|
|
107
|
+
yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),
|
|
108
|
+
shouldContinuePlanning
|
|
99
109
|
};
|
|
100
110
|
assert(planFromAI, "can't get plans from AI");
|
|
101
111
|
actions.forEach((action)=>{
|
|
@@ -109,7 +119,6 @@ async function plan(userInstruction, opts) {
|
|
|
109
119
|
if (locateResult && void 0 !== vlMode) action.param[field] = fillBboxParam(locateResult, imageWidth, imageHeight, rightLimit, bottomLimit, vlMode);
|
|
110
120
|
});
|
|
111
121
|
});
|
|
112
|
-
if (0 === actions.length && returnValue.more_actions_needed_by_instruction && !returnValue.sleep) console.warn('No actions planned for the prompt, but model said more actions are needed:', userInstruction);
|
|
113
122
|
conversationHistory.append({
|
|
114
123
|
role: 'assistant',
|
|
115
124
|
content: [
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.getData();\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.mjs","sources":["../../../src/ai-model/llm-planning.ts"],"sourcesContent":["import type {\n DeepThinkOption,\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n RawResponsePlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport {\n buildYamlFlowFromPlans,\n fillBboxParam,\n finalizeActionName,\n findAllMidsceneLocatorField,\n} from '../common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory: ConversationHistory;\n includeBbox: boolean;\n imagesIncludeCount?: number;\n deepThink?: DeepThinkOption;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { size } = context;\n const screenshotBase64 = context.screenshot.getData();\n\n const { vlMode } = modelConfig;\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode,\n includeBbox: opts.includeBbox,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n\n // Process image based on VL mode requirements\n if (vlMode === 'qwen2.5-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const actionContext = opts.actionContext\n ? `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>\\n`\n : '';\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${actionContext}<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n let latestFeedbackMessage: ChatCompletionMessageParam;\n\n if (conversationHistory.pendingFeedbackMessage) {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `${conversationHistory.pendingFeedbackMessage}. The last screenshot is attached. Please going on according to the instruction.`,\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n\n conversationHistory.resetPendingFeedbackMessageIfExists();\n } else {\n latestFeedbackMessage = {\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'this is the latest screenshot',\n },\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n };\n }\n conversationHistory.append(latestFeedbackMessage);\n const historyLog = conversationHistory.snapshot(opts.imagesIncludeCount);\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...instruction,\n ...historyLog,\n ];\n\n const {\n content: planFromAI,\n contentString: rawResponse,\n usage,\n reasoning_content,\n } = await callAIWithObjectResponse<RawResponsePlanningAIResponse>(\n msgs,\n modelConfig,\n {\n deepThink: opts.deepThink === 'unset' ? undefined : opts.deepThink,\n },\n );\n\n const actions = planFromAI.action ? [planFromAI.action] : [];\n let shouldContinuePlanning = true;\n if (!actions.length) {\n debug('no actions planned and no sleep instruction, stop planning');\n shouldContinuePlanning = false;\n }\n if (actions[0]?.type === finalizeActionName) {\n debug('finalize action planned, stop planning');\n shouldContinuePlanning = false;\n }\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n reasoning_content,\n yamlFlow: buildYamlFlowFromPlans(actions, opts.actionSpace),\n shouldContinuePlanning,\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult && vlMode !== undefined) {\n // Always use VL mode to fill bbox parameters\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n });\n });\n\n conversationHistory.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["debug","getDebug","plan","userInstruction","opts","context","modelConfig","conversationHistory","size","screenshotBase64","vlMode","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","actionContext","instruction","latestFeedbackMessage","historyLog","msgs","planFromAI","rawResponse","usage","reasoning_content","callAIWithObjectResponse","undefined","actions","shouldContinuePlanning","finalizeActionName","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam"],"mappings":";;;;;;AAuBA,MAAMA,QAAQC,SAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAUC;IAED,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGH;IACtD,MAAM,EAAEI,IAAI,EAAE,GAAGH;IACjB,MAAMI,mBAAmBJ,QAAQ,UAAU,CAAC,OAAO;IAEnD,MAAM,EAAEK,MAAM,EAAE,GAAGJ;IAEnB,MAAMK,eAAe,MAAMC,2BAA2B;QACpD,aAAaR,KAAK,WAAW;QAC7BM;QACA,aAAaN,KAAK,WAAW;IAC/B;IAEA,IAAIS,eAAeJ;IACnB,IAAIK,aAAaN,KAAK,KAAK;IAC3B,IAAIO,cAAcP,KAAK,MAAM;IAC7B,MAAMQ,aAAaF;IACnB,MAAMG,cAAcF;IAGpB,IAAIL,AAAW,iBAAXA,QAAyB;QAC3B,MAAMQ,eAAe,MAAMC,4BAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC;IAEA,MAAME,gBAAgBhB,KAAK,aAAa,GACpC,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,4BAA4B,CAAC,GAC5E;IAEJ,MAAMiB,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGD,cAAc,kBAAkB,EAAEjB,gBAAgB,mBAAmB,CAAC;gBACjF;aACD;QACH;KACD;IAED,IAAImB;IAEJ,IAAIf,oBAAoB,sBAAsB,EAAE;QAC9Ce,wBAAwB;YACtB,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,GAAGf,oBAAoB,sBAAsB,CAAC,gFAAgF,CAAC;gBACvI;gBACA;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;aACD;QACH;QAEAN,oBAAoB,mCAAmC;IACzD,OACEe,wBAAwB;QACtB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;YACA;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKT;oBACL,QAAQ;gBACV;YACF;SACD;IACH;IAEFN,oBAAoB,MAAM,CAACe;IAC3B,MAAMC,aAAahB,oBAAoB,QAAQ,CAACH,KAAK,kBAAkB;IAEvE,MAAMoB,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASb;QAAa;WACrCU;WACAE;KACJ;IAED,MAAM,EACJ,SAASE,UAAU,EACnB,eAAeC,WAAW,EAC1BC,KAAK,EACLC,iBAAiB,EAClB,GAAG,MAAMC,yBACRL,MACAlB,aACA;QACE,WAAWF,AAAmB,YAAnBA,KAAK,SAAS,GAAe0B,SAAY1B,KAAK,SAAS;IACpE;IAGF,MAAM2B,UAAUN,WAAW,MAAM,GAAG;QAACA,WAAW,MAAM;KAAC,GAAG,EAAE;IAC5D,IAAIO,yBAAyB;IAC7B,IAAI,CAACD,QAAQ,MAAM,EAAE;QACnB/B,MAAM;QACNgC,yBAAyB;IAC3B;IACA,IAAID,OAAO,CAAC,EAAE,EAAE,SAASE,oBAAoB;QAC3CjC,MAAM;QACNgC,yBAAyB;IAC3B;IACA,MAAME,cAAkC;QACtC,GAAGT,UAAU;QACbM;QACAL;QACAC;QACAC;QACA,UAAUO,uBAAuBJ,SAAS3B,KAAK,WAAW;QAC1D4B;IACF;IAEAI,OAAOX,YAAY;IAEnBM,QAAQ,OAAO,CAAC,CAACM;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBnC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACiC,SAAWA,OAAO,IAAI,KAAKC;QAG9BtC,MAAM,+BAA+BuC;QACrC,MAAMC,eAAeD,sBACjBE,4BAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENvC,MAAM,gBAAgBwC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,gBAAgBjC,AAAWoB,WAAXpB,QAElB2B,OAAO,KAAK,CAACK,MAAM,GAAGE,cACpBD,cACA7B,YACAC,aACAC,YACAC,aACAP;QAGN;IACF;IAEAH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAMmB;YACR;SACD;IACH;IAEA,OAAOQ;AACT"}
|
|
@@ -1,11 +1,25 @@
|
|
|
1
1
|
import { getZodDescription, getZodTypeName } from "@midscene/shared/zod-schema-utils";
|
|
2
2
|
import { bboxDescription } from "./common.mjs";
|
|
3
|
-
const commonOutputFields = `"
|
|
4
|
-
"
|
|
3
|
+
const commonOutputFields = `"thought": string, // your thought process about the next action
|
|
4
|
+
"note"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action.
|
|
5
|
+
"log": string, // a brief preamble to the user explaining what you’re about to do
|
|
6
|
+
"error"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.`;
|
|
5
7
|
const vlLocateParam = (vlMode)=>{
|
|
6
8
|
if (vlMode) return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;
|
|
7
9
|
return "{ prompt: string /* description of the target element */ }";
|
|
8
10
|
};
|
|
11
|
+
const findDefaultValue = (field)=>{
|
|
12
|
+
let current = field;
|
|
13
|
+
const visited = new Set();
|
|
14
|
+
while(current && !visited.has(current)){
|
|
15
|
+
visited.add(current);
|
|
16
|
+
const currentWithDef = current;
|
|
17
|
+
if (!currentWithDef._def?.typeName) break;
|
|
18
|
+
if ('ZodDefault' === currentWithDef._def.typeName) return currentWithDef._def.defaultValue?.();
|
|
19
|
+
if ('ZodOptional' === currentWithDef._def.typeName || 'ZodNullable' === currentWithDef._def.typeName) current = currentWithDef._def.innerType;
|
|
20
|
+
else break;
|
|
21
|
+
}
|
|
22
|
+
};
|
|
9
23
|
const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
|
|
10
24
|
const tab = ' ';
|
|
11
25
|
const fields = [];
|
|
@@ -21,8 +35,16 @@ const descriptionForAction = (action, locatorSchemaTypeDescription)=>{
|
|
|
21
35
|
const keyWithOptional = isOptional ? `${key}?` : key;
|
|
22
36
|
const typeName = getZodTypeName(field, locatorSchemaTypeDescription);
|
|
23
37
|
const description = getZodDescription(field);
|
|
38
|
+
const defaultValue = findDefaultValue(field);
|
|
39
|
+
const hasDefault = void 0 !== defaultValue;
|
|
24
40
|
let paramLine = `${keyWithOptional}: ${typeName}`;
|
|
25
|
-
|
|
41
|
+
const comments = [];
|
|
42
|
+
if (description) comments.push(description);
|
|
43
|
+
if (hasDefault) {
|
|
44
|
+
const defaultStr = 'string' == typeof defaultValue ? `"${defaultValue}"` : JSON.stringify(defaultValue);
|
|
45
|
+
comments.push(`default: ${defaultStr}`);
|
|
46
|
+
}
|
|
47
|
+
if (comments.length > 0) paramLine += ` // ${comments.join(', ')}`;
|
|
26
48
|
paramLines.push(paramLine);
|
|
27
49
|
}
|
|
28
50
|
if (paramLines.length > 0) {
|
|
@@ -76,7 +98,6 @@ Please tell what the next one action is (or null if no action should be done) to
|
|
|
76
98
|
- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.
|
|
77
99
|
- Make sure the previous actions are completed successfully before performing the next step
|
|
78
100
|
- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the "error" field to the error message.
|
|
79
|
-
- If there is nothing to do but waiting, set the "sleep" field to the positive waiting time in milliseconds and null for the "action" field.
|
|
80
101
|
- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the "Print_Assert_Result" action.
|
|
81
102
|
|
|
82
103
|
## Supporting actions
|
|
@@ -88,7 +109,6 @@ ${logFieldInstruction}
|
|
|
88
109
|
|
|
89
110
|
Return in JSON format:
|
|
90
111
|
{
|
|
91
|
-
"log": string, // a brief preamble to the user explaining what you’re about to do
|
|
92
112
|
${commonOutputFields}
|
|
93
113
|
"action":
|
|
94
114
|
{
|
|
@@ -96,16 +116,14 @@ Return in JSON format:
|
|
|
96
116
|
"param"?: { // The parameter of the action, if any
|
|
97
117
|
// k-v style parameter fields
|
|
98
118
|
},
|
|
99
|
-
} | null
|
|
100
|
-
,
|
|
101
|
-
"sleep"?: number, // The sleep time after the action, in milliseconds.
|
|
119
|
+
} | null
|
|
102
120
|
}
|
|
103
121
|
|
|
104
122
|
For example, if the instruction is to login and the form has already been filled, this is a valid return value:
|
|
105
123
|
|
|
106
124
|
{
|
|
125
|
+
"thought": "The form has already been filled, I need to click the login button to login",
|
|
107
126
|
"log": "Click the login button",
|
|
108
|
-
"more_actions_needed_by_instruction": false,
|
|
109
127
|
"action": {
|
|
110
128
|
"type": "Tap",
|
|
111
129
|
"param": {
|
|
@@ -115,6 +133,23 @@ For example, if the instruction is to login and the form has already been filled
|
|
|
115
133
|
}
|
|
116
134
|
}
|
|
117
135
|
}
|
|
136
|
+
|
|
137
|
+
For example, if the instruction is to find out every title in the screenshot, the return value should be:
|
|
138
|
+
|
|
139
|
+
{
|
|
140
|
+
"thought": "I need to note the titles in the current screenshot for further processing and scroll to find more titles",
|
|
141
|
+
"note": "The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'",
|
|
142
|
+
"log": "Scroll to find more titles",
|
|
143
|
+
"action": {
|
|
144
|
+
"type": "Scroll",
|
|
145
|
+
"param": {
|
|
146
|
+
"locate": {
|
|
147
|
+
"prompt": "The page content area"
|
|
148
|
+
},
|
|
149
|
+
"direction": "down"
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
118
153
|
`;
|
|
119
154
|
}
|
|
120
155
|
export { descriptionForAction, systemPromptToTaskPlanning };
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n \"log\": \"Click the login button\",\n \"more_actions_needed_by_instruction\": false,\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n`;\n}\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","field","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","paramLine","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction"],"mappings":";;AAYA,MAAMA,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACH,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAME,aACJ,AACE,cADF,OAAQF,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMG,kBAAkBD,aAAa,GAAGH,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMK,WAAWC,eAAeL,OAAOR;gBAGvC,MAAMc,cAAcC,kBAAkBP;gBAGtC,IAAIQ,YAAY,GAAGL,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,IAAIE,aACFE,aAAa,CAAC,IAAI,EAAEF,aAAa;gBAGnCX,WAAW,IAAI,CAACa;YAClB;YAIF,IAAIb,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACc;oBAClBf,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEe,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAML,WAAWC,eAAeT;YAChC,MAAMU,cAAcC,kBAAkBX;YAGtC,IAAIc,mBAAmB,CAAC,SAAS,EAAEN,UAAU;YAC7C,IAAIE,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpBhB,OAAO,IAAI,CAACgB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEnB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAekB,2BAA2B,EAC/CC,WAAW,EACXxB,MAAM,EACNyB,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACzB,QAClB,MAAM,IAAI0B,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAACrB,SACtCD,qBACLC,QACAJ,cAAc0B,cAAczB,SAAS4B;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAEhC,mBAAmB;;;;;;;;;;;;;;;;;;;;;oCAqBa,EAAEE,UAAUyB,cAAc,mCAAmC,GAAG;;;;;AAKpG,CAAC;AACD"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport {\n getZodDescription,\n getZodTypeName,\n} from '@midscene/shared/zod-schema-utils';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"thought\": string, // your thought process about the next action\n \"note\"?: string, // some important notes to finish the follow-up action should be written here, and the agent executing the subsequent steps will focus on this information. For example, the data extracted from the current screenshot which will be used in the follow-up action.\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n \"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\n/**\n * Find ZodDefault in the wrapper chain and return its default value\n */\nconst findDefaultValue = (field: unknown): any | undefined => {\n let current = field;\n const visited = new Set<unknown>();\n\n while (current && !visited.has(current)) {\n visited.add(current);\n const currentWithDef = current as {\n _def?: {\n typeName?: string;\n defaultValue?: () => any;\n innerType?: unknown;\n };\n };\n\n if (!currentWithDef._def?.typeName) break;\n\n if (currentWithDef._def.typeName === 'ZodDefault') {\n return currentWithDef._def.defaultValue?.();\n }\n\n // Continue unwrapping if it's a wrapper type\n if (\n currentWithDef._def.typeName === 'ZodOptional' ||\n currentWithDef._def.typeName === 'ZodNullable'\n ) {\n current = currentWithDef._def.innerType;\n } else {\n break;\n }\n }\n\n return undefined;\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as {\n _def?: { typeName?: string };\n shape?: Record<string, unknown>;\n };\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as { isOptional?: () => boolean }).isOptional ===\n 'function' &&\n (field as { isOptional: () => boolean }).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name using extracted helper\n const typeName = getZodTypeName(field, locatorSchemaTypeDescription);\n\n // Get description using extracted helper\n const description = getZodDescription(field as z.ZodTypeAny);\n\n // Check if field has a default value by searching the wrapper chain\n const defaultValue = findDefaultValue(field);\n const hasDefault = defaultValue !== undefined;\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n const comments: string[] = [];\n if (description) {\n comments.push(description);\n }\n if (hasDefault) {\n const defaultStr =\n typeof defaultValue === 'string'\n ? `\"${defaultValue}\"`\n : JSON.stringify(defaultValue);\n comments.push(`default: ${defaultStr}`);\n }\n if (comments.length > 0) {\n paramLine += ` // ${comments.join(', ')}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n const typeName = getZodTypeName(schema);\n const description = getZodDescription(schema as z.ZodTypeAny);\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- Assertions are also important steps. When getting the assertion instruction, a solid conclusion is required. You should explicitly state your conclusion by calling the \"Print_Assert_Result\" action.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n // k-v style parameter fields\n }, \n } | null\n}\n\nFor example, if the instruction is to login and the form has already been filled, this is a valid return value:\n\n{\n \"thought\": \"The form has already been filled, I need to click the login button to login\",\n \"log\": \"Click the login button\",\n \"action\": {\n \"type\": \"Tap\",\n \"param\": {\n \"locate\": { \n \"prompt\": \"The login button\"${vlMode && includeBbox ? `, \"bbox\": [100, 200, 300, 400]` : ''}\n }\n }\n }\n}\n\nFor example, if the instruction is to find out every title in the screenshot, the return value should be:\n\n{\n \"thought\": \"I need to note the titles in the current screenshot for further processing and scroll to find more titles\",\n \"note\": \"The titles in the current screenshot are: 'Hello, world!', 'Midscene 101', 'Model strategy'\",\n \"log\": \"Scroll to find more titles\",\n \"action\": {\n \"type\": \"Scroll\",\n \"param\": {\n \"locate\": {\n \"prompt\": \"The page content area\"\n },\n \"direction\": \"down\"\n }\n }\n}\n`;\n}\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","findDefaultValue","field","current","visited","Set","currentWithDef","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","key","Object","isOptional","keyWithOptional","typeName","getZodTypeName","description","getZodDescription","defaultValue","hasDefault","undefined","paramLine","comments","defaultStr","JSON","line","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","actionList","logFieldInstruction"],"mappings":";;AAYA,MAAMA,qBAAqB,CAAC;;;yNAG6L,CAAC;AAE1N,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAKA,MAAME,mBAAmB,CAACC;IACxB,IAAIC,UAAUD;IACd,MAAME,UAAU,IAAIC;IAEpB,MAAOF,WAAW,CAACC,QAAQ,GAAG,CAACD,SAAU;QACvCC,QAAQ,GAAG,CAACD;QACZ,MAAMG,iBAAiBH;QAQvB,IAAI,CAACG,eAAe,IAAI,EAAE,UAAU;QAEpC,IAAIA,AAAiC,iBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAC9B,OAAOA,eAAe,IAAI,CAAC,YAAY;QAIzC,IACEA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,IAC5BA,AAAiC,kBAAjCA,eAAe,IAAI,CAAC,QAAQ,EAE5BH,UAAUG,eAAe,IAAI,CAAC,SAAS;aAEvC;IAEJ;AAGF;AAEO,MAAMC,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QAIjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAE1B,KAAK,MAAM,CAACG,KAAKd,MAAM,IAAIe,OAAO,OAAO,CAACF,OACxC,IAAIb,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMgB,aACJ,AACE,cADF,OAAQhB,MAAyC,UAAU,IAE1DA,MAAwC,UAAU;gBACrD,MAAMiB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMI,WAAWC,eAAenB,OAAOO;gBAGvC,MAAMa,cAAcC,kBAAkBrB;gBAGtC,MAAMsB,eAAevB,iBAAiBC;gBACtC,MAAMuB,aAAaD,AAAiBE,WAAjBF;gBAGnB,IAAIG,YAAY,GAAGR,gBAAgB,EAAE,EAAEC,UAAU;gBACjD,MAAMQ,WAAqB,EAAE;gBAC7B,IAAIN,aACFM,SAAS,IAAI,CAACN;gBAEhB,IAAIG,YAAY;oBACd,MAAMI,aACJ,AAAwB,YAAxB,OAAOL,eACH,CAAC,CAAC,EAAEA,aAAa,CAAC,CAAC,GACnBM,KAAK,SAAS,CAACN;oBACrBI,SAAS,IAAI,CAAC,CAAC,SAAS,EAAEC,YAAY;gBACxC;gBACA,IAAID,SAAS,MAAM,GAAG,GACpBD,aAAa,CAAC,IAAI,EAAEC,SAAS,IAAI,CAAC,OAAO;gBAG3ChB,WAAW,IAAI,CAACe;YAClB;YAIF,IAAIf,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAACmB;oBAClBpB,OAAO,IAAI,CAAC,CAAC,IAAI,EAAEoB,MAAM;gBAC3B;YACF;QACF,OAAO;YAEL,MAAMX,WAAWC,eAAeR;YAChC,MAAMS,cAAcC,kBAAkBV;YAGtC,IAAImB,mBAAmB,CAAC,SAAS,EAAEZ,UAAU;YAC7C,IAAIE,aACFU,oBAAoB,CAAC,IAAI,EAAEV,aAAa;YAE1CU,oBAAoB;YAEpBrB,OAAO,IAAI,CAACqB;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAExB,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAeuB,2BAA2B,EAC/CC,WAAW,EACXnC,MAAM,EACNoC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACpC,QAClB,MAAM,IAAIqC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAC1B,SACtCD,qBACLC,QACAV,cAAcqC,cAAcpC,SAAS2B;IAGzC,MAAMY,aAAaD,sBAAsB,IAAI,CAAC;IAE9C,MAAME,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;AAeV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;EAMpB,EAAE1C,mBAAmB;;;;;;;;;;;;;;;;;;;oCAmBa,EAAEE,UAAUoC,cAAc,mCAAmC,GAAG;;;;;;;;;;;;;;;;;;;;;;AAsBpG,CAAC;AACD"}
|
|
@@ -4,7 +4,7 @@ import { assert, ifInBrowser } from "@midscene/shared/utils";
|
|
|
4
4
|
import { jsonrepair } from "jsonrepair";
|
|
5
5
|
import openai_0 from "openai";
|
|
6
6
|
async function createChatClient({ modelConfig }) {
|
|
7
|
-
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, createOpenAIClient, timeout } = modelConfig;
|
|
7
|
+
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, modelFamily, createOpenAIClient, timeout } = modelConfig;
|
|
8
8
|
let proxyAgent;
|
|
9
9
|
const debugProxy = getDebug('ai:call:proxy');
|
|
10
10
|
const sanitizeProxyUrl = (url)=>{
|
|
@@ -99,11 +99,12 @@ async function createChatClient({ modelConfig }) {
|
|
|
99
99
|
modelName,
|
|
100
100
|
modelDescription,
|
|
101
101
|
uiTarsVersion,
|
|
102
|
-
vlMode
|
|
102
|
+
vlMode,
|
|
103
|
+
modelFamily
|
|
103
104
|
};
|
|
104
105
|
}
|
|
105
106
|
async function callAI(messages, modelConfig, options) {
|
|
106
|
-
const { completion, modelName, modelDescription, uiTarsVersion, vlMode } = await createChatClient({
|
|
107
|
+
const { completion, modelName, modelDescription, uiTarsVersion, vlMode, modelFamily } = await createChatClient({
|
|
107
108
|
modelConfig
|
|
108
109
|
});
|
|
109
110
|
const maxTokens = globalConfigManager.getEnvConfigValueAsNumber(MIDSCENE_MODEL_MAX_TOKENS) ?? globalConfigManager.getEnvConfigValueAsNumber(OPENAI_MAX_TOKENS);
|
|
@@ -142,7 +143,7 @@ async function callAI(messages, modelConfig, options) {
|
|
|
142
143
|
};
|
|
143
144
|
const { config: deepThinkConfig, debugMessage, warningMessage } = resolveDeepThinkConfig({
|
|
144
145
|
deepThink: options?.deepThink,
|
|
145
|
-
|
|
146
|
+
modelFamily
|
|
146
147
|
});
|
|
147
148
|
if (debugMessage) debugCall(debugMessage);
|
|
148
149
|
if (warningMessage) {
|
|
@@ -290,19 +291,19 @@ function preprocessDoubaoBboxJson(input) {
|
|
|
290
291
|
if (input.includes('bbox')) while(/\d+\s+\d+/.test(input))input = input.replace(/(\d+)\s+(\d+)/g, '$1,$2');
|
|
291
292
|
return input;
|
|
292
293
|
}
|
|
293
|
-
function resolveDeepThinkConfig({ deepThink,
|
|
294
|
+
function resolveDeepThinkConfig({ deepThink, modelFamily }) {
|
|
294
295
|
const normalizedDeepThink = 'unset' === deepThink ? void 0 : deepThink;
|
|
295
296
|
if (void 0 === normalizedDeepThink) return {
|
|
296
297
|
config: {},
|
|
297
298
|
debugMessage: void 0
|
|
298
299
|
};
|
|
299
|
-
if ('qwen3-vl' ===
|
|
300
|
+
if ('qwen3-vl' === modelFamily) return {
|
|
300
301
|
config: {
|
|
301
302
|
enable_thinking: normalizedDeepThink
|
|
302
303
|
},
|
|
303
304
|
debugMessage: `deepThink mapped to enable_thinking=${normalizedDeepThink} for qwen3-vl`
|
|
304
305
|
};
|
|
305
|
-
if ('doubao-vision' ===
|
|
306
|
+
if ('doubao-vision' === modelFamily) return {
|
|
306
307
|
config: {
|
|
307
308
|
thinking: {
|
|
308
309
|
type: normalizedDeepThink ? 'enabled' : 'disabled'
|
|
@@ -310,7 +311,7 @@ function resolveDeepThinkConfig({ deepThink, vlMode }) {
|
|
|
310
311
|
},
|
|
311
312
|
debugMessage: `deepThink mapped to thinking.type=${normalizedDeepThink ? 'enabled' : 'disabled'} for doubao-vision`
|
|
312
313
|
};
|
|
313
|
-
if ('glm-v' ===
|
|
314
|
+
if ('glm-v' === modelFamily) return {
|
|
314
315
|
config: {
|
|
315
316
|
thinking: {
|
|
316
317
|
type: normalizedDeepThink ? 'enabled' : 'disabled'
|
|
@@ -320,8 +321,8 @@ function resolveDeepThinkConfig({ deepThink, vlMode }) {
|
|
|
320
321
|
};
|
|
321
322
|
return {
|
|
322
323
|
config: {},
|
|
323
|
-
debugMessage: `deepThink ignored: unsupported model_family "${
|
|
324
|
-
warningMessage: `The "deepThink" option is not supported for model_family "${
|
|
324
|
+
debugMessage: `deepThink ignored: unsupported model_family "${modelFamily ?? 'default'}"`,
|
|
325
|
+
warningMessage: `The "deepThink" option is not supported for model_family "${modelFamily ?? 'default'}".`
|
|
325
326
|
};
|
|
326
327
|
}
|
|
327
328
|
function normalizeJsonObject(obj) {
|