@midscene/core 1.0.1-beta-20251204032807.0 → 1.0.1-beta-20251204075416.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/tasks.mjs +5 -2
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +7 -94
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +1 -32
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/tasks.js +5 -2
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +7 -97
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +0 -34
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/prompt/llm-planning.d.ts +0 -2
- package/dist/types/ai-model/service-caller/index.d.ts +1 -3
- package/package.json +2 -2
package/dist/es/agent/tasks.mjs
CHANGED
|
@@ -140,7 +140,7 @@ class TaskExecutor {
|
|
|
140
140
|
const timeRemaining = sleep - (timeNow - startTime);
|
|
141
141
|
if (timeRemaining > 0) finalActions.push(this.sleepPlan(timeRemaining));
|
|
142
142
|
}
|
|
143
|
-
|
|
143
|
+
assert(!error, `Failed to continue: ${error}\n${log || ''}`);
|
|
144
144
|
return {
|
|
145
145
|
cache: {
|
|
146
146
|
hit: false
|
|
@@ -163,15 +163,18 @@ class TaskExecutor {
|
|
|
163
163
|
return session.appendErrorPlan(`Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(plans)}`);
|
|
164
164
|
}
|
|
165
165
|
if (this.conversationHistory.pendingFeedbackMessage) console.warn('unconsumed pending feedback message detected, this may lead to unexpected planning result:', this.conversationHistory.pendingFeedbackMessage);
|
|
166
|
+
let errorFlag = false;
|
|
166
167
|
try {
|
|
167
168
|
await session.appendAndRun(executables.tasks);
|
|
168
169
|
} catch (error) {
|
|
170
|
+
errorFlag = true;
|
|
169
171
|
errorCountInOnePlanningLoop++;
|
|
170
172
|
this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;
|
|
171
173
|
debug('error when executing running tasks, but continue to run if it is not too many errors:', error instanceof Error ? error.message : String(error), 'current error count in one planning loop:', errorCountInOnePlanningLoop);
|
|
172
174
|
}
|
|
173
175
|
if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) return session.appendErrorPlan('Too many errors in one planning loop');
|
|
174
|
-
if (!planResult?.more_actions_needed_by_instruction)
|
|
176
|
+
if (!planResult?.more_actions_needed_by_instruction) if (errorFlag) debug('more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run');
|
|
177
|
+
else break;
|
|
175
178
|
++replanCount;
|
|
176
179
|
if (replanCount > replanningCycleLimit) {
|
|
177
180
|
const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n backgroundKnowledge?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActionContext: backgroundKnowledge,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.output = {\n actions: actions || [],\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n };\n executorContext.uiContext = uiContext;\n\n const finalActions = [...(actions || [])];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n if ((actions || []).length === 0) {\n assert(\n sleep,\n error\n ? `Failed to continue: ${error}\\n${log || ''}`\n : 'No plan found',\n );\n }\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check if task is complete\n if (!planResult?.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage =\n 'I have finished the action previously planned.';\n }\n }\n\n const finalResult = {\n output: {\n yamlFlow,\n },\n runner,\n };\n return finalResult;\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n undefined,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","backgroundKnowledge","cacheable","replanningCycleLimitOverride","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","startTime","Date","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","executables","String","Error","errorMsg","finalResult","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","lastCheckStart","errorThought","currentCheckStart","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA8CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAkBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA0BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,mBAA4B,EAC5BC,SAAmB,EACnBC,4BAAqC,EAQrC;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMf,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIgB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJH,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEY,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAGlC,MAAO,KAAM;YACX,MAAMX,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjB,iBAAiBE;gBACnB;gBACA,UAAU,OAAOV,OAAOC;oBACtB,MAAMiB,YAAYC,KAAK,GAAG;oBAC1B,MAAM,EAAEjB,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEkB,MAAM,EAAE,GAAG3B;oBACnB,MAAM4B,qBACJD,AAAW,kBAAXA,SACI3B,uBAAuB,kBAAkB,GACzCuB;oBAENb,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;oBAEF,MAAMmB,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;oBACpDtC,MACE,sCACAsC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDpB,OAAOqB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAG5B,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,eAAe;wBACpC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CsB;wBACA,aAAa7B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;oBACf;oBACAzB,MAAM,cAAc6C,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;oBAEJzB,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCkC;oBACF;oBACAlC,gBAAgB,IAAI,CAAC,KAAK,GAAGiC;oBAC7BjC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS6B,WAAW,EAAE;wBACtBE;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACAzB,gBAAgB,SAAS,GAAGC;oBAE5B,MAAMmC,eAAe;2BAAKP,WAAW,EAAE;qBAAE;oBAEzC,IAAIM,OAAO;wBACT,MAAME,UAAUnB,KAAK,GAAG;wBACxB,MAAMoB,gBAAgBH,QAASE,CAAAA,UAAUpB,SAAQ;wBACjD,IAAIqB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;oBAErC;oBAEA,IAAKT,AAA0B,MAA1BA,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,EACxB3B,OACEiC,OACAH,QACI,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI,GAC5C;oBAIR,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAapB,QAAQ;YAG3B,MAAMd,QAAQkC,YAAY,WAAW,EAAE;YACvCZ,SAAS,IAAI,IAAKY,YAAY,YAAY,EAAE;YAE5C,IAAIc;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9ChD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAOsB,OAAO;gBACd,OAAOpC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEoC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ErC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDiC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAGnD,IAAI;gBACF,MAAM5B,QAAQ,YAAY,CAAC2C,YAAY,KAAK;YAC9C,EAAE,OAAOP,OAAY;gBACnBhB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,+BAA+B,EAAEgB,OAAO,WAAWQ,OAAOR,QAAQ;gBACrHjD,MACE,yFACAiD,iBAAiBS,QAAQT,MAAM,OAAO,GAAGQ,OAAOR,QAChD,6CACAhB;YAEJ;YAEA,IAAIA,8BAA8B/B,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAAC6B,YAAY,oCACf;YAIF,EAAEb;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM4B,WAAW,CAAC,UAAU,EAAE5B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOlB,QAAQ,eAAe,CAAC8C;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAC7C;QAEN;QAEA,MAAMC,cAAc;YAClB,QAAQ;gBACN9B;YACF;YACAV;QACF;QACA,OAAOwC;IACT;IAEQ,oBACNC,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAO9C,OAAOmD;gBACtB,MAAM,EAAEpD,IAAI,EAAE,GAAGoD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZvD,KAAK,GAAG,GAAG;wBACTuD;oBACF;gBACF;gBAGA,MAAMpD,YAAYiD,YAAY,SAAS;gBACvChD,OAAOD,WAAW;gBAElB,MAAMqD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DhE,MAAM;oBACN,MAAM6E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOhB,OAAO;oBACd,IAAIA,iBAAiB8B,cACnBV,UAAUpB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAE+B,IAAI,EAAE9B,KAAK,EAAE+B,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL/D,OACE6D,MAAM,CAACP,YAAY,KAAKzC,QACxB;oBAEFkD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCnE,KAAK,KAAK,GAAGmC;oBACbnC,KAAK,OAAO,GAAGkE;oBACf,MAAM,IAAIvB,MAAM,CAAC,kBAAkB,EAAEuB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLlB;oBACA+B;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMpD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE+C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASjB,KAAK,SAAS,CAACiB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAM7C,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACqD;QAE1C,IAAI,CAAC5C,QACH,MAAM,IAAIoC,MACR;QAIJ,MAAM,EAAEnC,MAAM,EAAE0D,OAAO,EAAE,GAAG3D;QAE5B,OAAO;YACLC;YACA0D;YACA7D;QACF;IACF;IAEQ,UAAU+D,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;QACF;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMzE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW0E;QAE1B,MAAMpE,SAASP,QAAQ,SAAS;QAChC,MAAM,EAAE4E,SAAS,EAAEC,eAAe,EAAE,GAAG1B;QAEvC7C,OAAOkE,WAAW;QAClBlE,OAAOsE,WAAW;QAClBtE,OAAOuE,iBAAiB;QAExBvE,OACEuE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBxD,KAAK,GAAG;QACjC,IAAIyD,iBAAiBD;QACrB,IAAIE,eAAe;QAEnB,MAAOD,iBAAiBD,oBAAoBF,UAAW;YACrD,MAAMK,oBAAoB3D,KAAK,GAAG;YAClCyD,iBAAiBE;YACjB,MAAM5B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACA/B,QACAiC;YAGF,MAAM3C,SAAU,MAAMT,QAAQ,YAAY,CAACqD;YAO3C,IAAI5C,QAAQ,QACV,OAAO;gBACL,QAAQU;gBACRZ;YACF;YAGFyE,eACEvE,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEgE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMS,MAAM5D,KAAK,GAAG;YACpB,IAAI4D,MAAMD,oBAAoBJ,iBAAiB;gBAC7C,MAAMnC,gBAAgBmC,kBAAmBK,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAME,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQzC;gBACV;gBACA,MAAM1C,QAAQ,MAAM,CAACmF;YACvB;QACF;QAEA,OAAOnF,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEgF,cAAc;IACnE;IA7jBA,YACEI,iBAAoC,EACpCC,OAAgB,EAChBC,IAKC,CACD;QA9BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAiBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AAwiBF"}
|
|
1
|
+
{"version":3,"file":"agent/tasks.mjs","sources":["../../../src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport { TaskExecutionError } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\ninterface TaskExecutorHooks {\n onTaskUpdate?: (\n runner: TaskRunner,\n error?: TaskExecutionError,\n ) => Promise<void> | void;\n}\n\nconst debug = getDebug('device-task-executor');\nconst maxErrorCountAllowedInOnePlanningLoop = 5;\n\nexport { TaskExecutionError };\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n private readonly hooks?: TaskExecutorHooks;\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n hooks?: TaskExecutorHooks;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.hooks = opts.hooks;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n onTaskUpdate: this.hooks?.onTaskUpdate,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n options,\n );\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n const runner = session.getRunner();\n await session.appendAndRun(task);\n\n return {\n runner,\n };\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n );\n const runner = session.getRunner();\n const result = await session.appendAndRun(tasks);\n const { output } = result ?? {};\n return {\n output,\n runner,\n };\n }\n\n async action(\n userPrompt: string,\n modelConfigForPlanning: IModelConfig,\n modelConfigForDefaultIntent: IModelConfig,\n includeBboxInPlanning: boolean,\n backgroundKnowledge?: string,\n cacheable?: boolean,\n replanningCycleLimitOverride?: number,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit =\n replanningCycleLimitOverride ?? this.replanningCycleLimit;\n assert(\n replanningCycleLimit !== undefined,\n 'replanningCycleLimit is required for TaskExecutor.action',\n );\n\n let errorCountInOnePlanningLoop = 0; // count the number of errors in one planning loop\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n const result = await session.appendAndRun(\n {\n type: 'Planning',\n subType: 'Plan',\n param: {\n userInstruction: userPrompt,\n aiActionContext: backgroundKnowledge,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfigForPlanning;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars'\n ? modelConfigForPlanning.uiTarsModelVersion\n : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion\n ? uiTarsPlanning\n : plan)(param.userInstruction, {\n context: uiContext,\n actionContext: param.aiActionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig: modelConfigForPlanning,\n conversationHistory: this.conversationHistory,\n includeBbox: includeBboxInPlanning,\n });\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n executorContext.task.output = {\n actions: actions || [],\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n };\n executorContext.uiContext = uiContext;\n\n const finalActions = [...(actions || [])];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n assert(!error, `Failed to continue: ${error}\\n${log || ''}`);\n\n return {\n cache: {\n hit: false,\n },\n } as any;\n },\n },\n {\n allowWhenError: true,\n },\n );\n\n const planResult = result?.output as PlanningAIResponse | undefined;\n\n // Execute planned actions\n const plans = planResult?.actions || [];\n yamlFlow.push(...(planResult?.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(\n plans,\n modelConfigForPlanning,\n modelConfigForDefaultIntent,\n {\n cacheable,\n subTask: true,\n },\n );\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (this.conversationHistory.pendingFeedbackMessage) {\n console.warn(\n 'unconsumed pending feedback message detected, this may lead to unexpected planning result:',\n this.conversationHistory.pendingFeedbackMessage,\n );\n }\n let errorFlag = false;\n try {\n await session.appendAndRun(executables.tasks);\n } catch (error: any) {\n errorFlag = true;\n errorCountInOnePlanningLoop++;\n this.conversationHistory.pendingFeedbackMessage = `Error executing running tasks: ${error?.message || String(error)}`;\n debug(\n 'error when executing running tasks, but continue to run if it is not too many errors:',\n error instanceof Error ? error.message : String(error),\n 'current error count in one planning loop:',\n errorCountInOnePlanningLoop,\n );\n }\n\n if (errorCountInOnePlanningLoop > maxErrorCountAllowedInOnePlanningLoop) {\n return session.appendErrorPlan('Too many errors in one planning loop');\n }\n\n // Check if task is complete\n if (!planResult?.more_actions_needed_by_instruction) {\n if (errorFlag) {\n debug(\n 'more_actions_needed_by_instruction is false, but there are errors in one planning loop, continue to run',\n );\n } else {\n break;\n }\n }\n\n // Increment replan count for next iteration\n ++replanCount;\n\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanned ${replanningCycleLimit} times, exceeding the limit. Please configure a larger value for replanningCycleLimit (or use MIDSCENE_REPLANNING_CYCLE_LIMIT) to handle more complex tasks.`;\n return session.appendErrorPlan(errorMsg);\n }\n\n if (!this.conversationHistory.pendingFeedbackMessage) {\n this.conversationHistory.pendingFeedbackMessage =\n 'I have finished the action previously planned.';\n }\n }\n\n const finalResult = {\n output: {\n yamlFlow,\n },\n runner,\n };\n return finalResult;\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const runner = session.getRunner();\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner,\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let lastCheckStart = overallStartTime;\n let errorThought = '';\n // Continue checking as long as the previous iteration began within the timeout window.\n while (lastCheckStart - overallStartTime <= timeoutMs) {\n const currentCheckStart = Date.now();\n lastCheckStart = currentCheckStart;\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n undefined,\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - currentCheckStart < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - currentCheckStart);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","maxErrorCountAllowedInOnePlanningLoop","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfigForPlanning","modelConfigForDefaultIntent","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","runner","tasks","result","output","userPrompt","includeBboxInPlanning","backgroundKnowledge","cacheable","replanningCycleLimitOverride","replanCount","yamlFlow","replanningCycleLimit","undefined","errorCountInOnePlanningLoop","startTime","Date","vlMode","uiTarsModelVersion","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","executables","errorFlag","String","Error","errorMsg","finalResult","type","demand","modelConfig","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","lastCheckStart","errorThought","currentCheckStart","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA8CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,wCAAwC;AAIvC,MAAMC;IAkBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IA0BQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,SAAS;YAChB,cAAc,IAAI,CAAC,KAAK,EAAE;QAC5B;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACzCL,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAC3BG,OACAC,wBACAC,6BACAL;IAEJ;IAEA,MAAM,uBAAuBM,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMQ,SAASP,QAAQ,SAAS;QAChC,MAAMA,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACLK;QACF;IACF;IAEA,MAAM,SACJhB,KAAa,EACbI,KAAuB,EACvBC,sBAAoC,EACpCC,2BAAyC,EACf;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACT;QAC5C,MAAM,EAAEiB,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAClDb,OACAC,wBACAC;QAEF,MAAMU,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACQ;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD,UAAU,CAAC;QAC9B,OAAO;YACLC;YACAH;QACF;IACF;IAEA,MAAM,OACJI,UAAkB,EAClBf,sBAAoC,EACpCC,2BAAyC,EACzCe,qBAA8B,EAC9BC,mBAA4B,EAC5BC,SAAmB,EACnBC,4BAAqC,EAQrC;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMf,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUU;QAEzB,MAAMJ,SAASP,QAAQ,SAAS;QAEhC,IAAIgB,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBACJH,gCAAgC,IAAI,CAAC,oBAAoB;QAC3DT,OACEY,AAAyBC,WAAzBD,sBACA;QAGF,IAAIE,8BAA8B;QAGlC,MAAO,KAAM;YACX,MAAMX,SAAS,MAAMT,QAAQ,YAAY,CACvC;gBACE,MAAM;gBACN,SAAS;gBACT,OAAO;oBACL,iBAAiBW;oBACjB,iBAAiBE;gBACnB;gBACA,UAAU,OAAOV,OAAOC;oBACtB,MAAMiB,YAAYC,KAAK,GAAG;oBAC1B,MAAM,EAAEjB,SAAS,EAAE,GAAGD;oBACtBE,OAAOD,WAAW;oBAClB,MAAM,EAAEkB,MAAM,EAAE,GAAG3B;oBACnB,MAAM4B,qBACJD,AAAW,kBAAXA,SACI3B,uBAAuB,kBAAkB,GACzCuB;oBAENb,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;oBAEF,MAAMmB,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;oBACpDtC,MACE,sCACAsC,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;oBAEhDpB,OAAOqB,MAAM,OAAO,CAACF,cAAc;oBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;oBAIrG,MAAMC,aAAa,MAAOL,AAAAA,CAAAA,qBACtBM,iBACAC,IAAG,EAAG5B,MAAM,eAAe,EAAE;wBAC/B,SAASE;wBACT,eAAeF,MAAM,eAAe;wBACpC,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;wBAC3CsB;wBACA,aAAa7B;wBACb,qBAAqB,IAAI,CAAC,mBAAmB;wBAC7C,aAAagB;oBACf;oBACAzB,MAAM,cAAc6C,KAAK,SAAS,CAACH,YAAY,MAAM;oBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;oBAEJzB,gBAAgB,IAAI,CAAC,GAAG,GAAG;wBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;wBAClCkC;oBACF;oBACAlC,gBAAgB,IAAI,CAAC,KAAK,GAAGiC;oBAC7BjC,gBAAgB,IAAI,CAAC,MAAM,GAAG;wBAC5B,SAAS6B,WAAW,EAAE;wBACtBE;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACAzB,gBAAgB,SAAS,GAAGC;oBAE5B,MAAMmC,eAAe;2BAAKP,WAAW,EAAE;qBAAE;oBAEzC,IAAIM,OAAO;wBACT,MAAME,UAAUnB,KAAK,GAAG;wBACxB,MAAMoB,gBAAgBH,QAASE,CAAAA,UAAUpB,SAAQ;wBACjD,IAAIqB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;oBAErC;oBAEApC,OAAO,CAAC8B,OAAO,CAAC,oBAAoB,EAAEA,MAAM,EAAE,EAAEF,OAAO,IAAI;oBAE3D,OAAO;wBACL,OAAO;4BACL,KAAK;wBACP;oBACF;gBACF;YACF,GACA;gBACE,gBAAgB;YAClB;YAGF,MAAML,aAAapB,QAAQ;YAG3B,MAAMd,QAAQkC,YAAY,WAAW,EAAE;YACvCZ,SAAS,IAAI,IAAKY,YAAY,YAAY,EAAE;YAE5C,IAAIc;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAC9ChD,OACAC,wBACAC,6BACA;oBACEiB;oBACA,SAAS;gBACX;YAEJ,EAAE,OAAOsB,OAAO;gBACd,OAAOpC,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEoC,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ErC,QACC;YAEP;YACA,IAAI,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EACjDiC,QAAQ,IAAI,CACV,8FACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB;YAGnD,IAAIgB,YAAY;YAChB,IAAI;gBACF,MAAM5C,QAAQ,YAAY,CAAC2C,YAAY,KAAK;YAC9C,EAAE,OAAOP,OAAY;gBACnBQ,YAAY;gBACZxB;gBACA,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAAG,CAAC,+BAA+B,EAAEgB,OAAO,WAAWS,OAAOT,QAAQ;gBACrHjD,MACE,yFACAiD,iBAAiBU,QAAQV,MAAM,OAAO,GAAGS,OAAOT,QAChD,6CACAhB;YAEJ;YAEA,IAAIA,8BAA8B/B,uCAChC,OAAOW,QAAQ,eAAe,CAAC;YAIjC,IAAI,CAAC6B,YAAY,oCACf,IAAIe,WACFzD,MACE;iBAGF;YAKJ,EAAE6B;YAEF,IAAIA,cAAcE,sBAAsB;gBACtC,MAAM6B,WAAW,CAAC,UAAU,EAAE7B,qBAAqB,4JAA4J,CAAC;gBAChN,OAAOlB,QAAQ,eAAe,CAAC+C;YACjC;YAEA,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,EAClD,IAAI,CAAC,mBAAmB,CAAC,sBAAsB,GAC7C;QAEN;QAEA,MAAMC,cAAc;YAClB,QAAQ;gBACN/B;YACF;YACAV;QACF;QACA,OAAOyC;IACT;IAEQ,oBACNC,IAAsE,EACtEC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASL;YACT,OAAO;gBACL,YAAYI,mBACP;oBACCH;oBACAG;gBACF,IACAH;YACN;YACA,UAAU,OAAO/C,OAAOoD;gBACtB,MAAM,EAAErD,IAAI,EAAE,GAAGqD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZxD,KAAK,GAAG,GAAG;wBACTwD;oBACF;gBACF;gBAGA,MAAMrD,YAAYkD,YAAY,SAAS;gBACvCjD,OAAOD,WAAW;gBAElB,MAAMsD,mBAAmBV,AAAS,YAATA;gBACzB,IAAIW,cAAcV;gBAClB,IAAIW,cAAc;gBAClB,IAAIF,oBAAqBV,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEY,cAAc;oBACd,MAAMC,gBACJb,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIU,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGZ,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIa;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,KAAK,eAAe,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1DjE,MAAM;oBACN,MAAM8E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,KAAK,gBAAgB;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACAT,aACAC,KACAY,sBACAX;gBAEJ,EAAE,OAAOjB,OAAO;oBACd,IAAIA,iBAAiB+B,cACnBV,UAAUrB,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEgC,IAAI,EAAE/B,KAAK,EAAEgC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAInB,AAAS,cAATA,MAEPqB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACLhE,OACE8D,MAAM,CAACP,YAAY,KAAK1C,QACxB;oBAEFmD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIZ,AAAS,aAATA,QAAqB,CAACqB,cAAc;oBACtCpE,KAAK,KAAK,GAAGmC;oBACbnC,KAAK,OAAO,GAAGmE;oBACf,MAAM,IAAIvB,MAAM,CAAC,kBAAkB,EAAEuB,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACLnB;oBACAgC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJL,IAA0D,EAC1DC,MAA2B,EAC3BC,WAAyB,EACzBC,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMrD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACEgD,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAASlB,KAAK,SAAS,CAACkB;QAIzD,MAAMI,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CL,MACAC,QACAC,aACAC,KACAC;QAGF,MAAM9C,SAASP,QAAQ,SAAS;QAChC,MAAMS,SAAS,MAAMT,QAAQ,YAAY,CAACsD;QAE1C,IAAI,CAAC7C,QACH,MAAM,IAAIqC,MACR;QAIJ,MAAM,EAAEpC,MAAM,EAAE2D,OAAO,EAAE,GAAG5D;QAE5B,OAAO;YACLC;YACA2D;YACA9D;QACF;IACF;IAEQ,UAAUgE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;QACF;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBrB,GAA+B,EAC/BD,WAAyB,EACO;QAChC,MAAM,EAAEuB,UAAU,EAAErB,gBAAgB,EAAE,GAAGsB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAM1E,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAW2E;QAE1B,MAAMrE,SAASP,QAAQ,SAAS;QAChC,MAAM,EAAE6E,SAAS,EAAEC,eAAe,EAAE,GAAG1B;QAEvC9C,OAAOmE,WAAW;QAClBnE,OAAOuE,WAAW;QAClBvE,OAAOwE,iBAAiB;QAExBxE,OACEwE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBzD,KAAK,GAAG;QACjC,IAAI0D,iBAAiBD;QACrB,IAAIE,eAAe;QAEnB,MAAOD,iBAAiBD,oBAAoBF,UAAW;YACrD,MAAMK,oBAAoB5D,KAAK,GAAG;YAClC0D,iBAAiBE;YACjB,MAAM5B,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAoB,YACAvB,aACAhC,QACAkC;YAGF,MAAM5C,SAAU,MAAMT,QAAQ,YAAY,CAACsD;YAO3C,IAAI7C,QAAQ,QACV,OAAO;gBACL,QAAQU;gBACRZ;YACF;YAGF0E,eACExE,QAAQ,WACP,CAACA,UAAU,CAAC,0BAA0B,EAAEiE,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMS,MAAM7D,KAAK,GAAG;YACpB,IAAI6D,MAAMD,oBAAoBJ,iBAAiB;gBAC7C,MAAMpC,gBAAgBoC,kBAAmBK,CAAAA,MAAMD,iBAAgB;gBAC/D,MAAME,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQ1C;gBACV;gBACA,MAAM1C,QAAQ,MAAM,CAACoF;YACvB;QACF;QAEA,OAAOpF,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAEiF,cAAc;IACnE;IA9jBA,YACEI,iBAAoC,EACpCC,OAAgB,EAChBC,IAKC,CACD;QA9BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA,uBAAiB,SAAjB;QAEA;QAiBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,MAAM;QACjC,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,KAAK,GAAGA,KAAK,KAAK;QACvB,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AAyiBF"}
|
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -100,7 +100,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
100
100
|
return;
|
|
101
101
|
}
|
|
102
102
|
}
|
|
103
|
-
const getMidsceneVersion = ()=>"1.0.1-beta-
|
|
103
|
+
const getMidsceneVersion = ()=>"1.0.1-beta-20251204075416.0";
|
|
104
104
|
const parsePrompt = (prompt)=>{
|
|
105
105
|
if ('string' == typeof prompt) return {
|
|
106
106
|
textPrompt: prompt,
|
|
@@ -149,105 +149,18 @@ Return in JSON format:
|
|
|
149
149
|
${commonOutputFields}
|
|
150
150
|
"action":
|
|
151
151
|
{
|
|
152
|
-
//
|
|
152
|
+
"type": string, // the type of the action
|
|
153
|
+
"param"?: { // The parameter of the action, if any
|
|
154
|
+
"locate": { // for example, if the action is "Tap", the "locate" field is required
|
|
155
|
+
"prompt": string,
|
|
156
|
+
},
|
|
157
|
+
},
|
|
153
158
|
} | null,
|
|
154
159
|
,
|
|
155
160
|
"sleep"?: number, // The sleep time after the action, in milliseconds.
|
|
156
161
|
}
|
|
157
162
|
`;
|
|
158
163
|
}
|
|
159
|
-
|
|
160
|
-
type: 'json_schema',
|
|
161
|
-
json_schema: {
|
|
162
|
-
name: 'action_items',
|
|
163
|
-
strict: false,
|
|
164
|
-
schema: {
|
|
165
|
-
type: 'object',
|
|
166
|
-
strict: false,
|
|
167
|
-
properties: {
|
|
168
|
-
actions: {
|
|
169
|
-
type: 'array',
|
|
170
|
-
items: {
|
|
171
|
-
type: 'object',
|
|
172
|
-
strict: false,
|
|
173
|
-
properties: {
|
|
174
|
-
thought: {
|
|
175
|
-
type: 'string',
|
|
176
|
-
description: 'Reasons for generating this task, and why this task is feasible on this page'
|
|
177
|
-
},
|
|
178
|
-
type: {
|
|
179
|
-
type: 'string',
|
|
180
|
-
description: 'Type of action'
|
|
181
|
-
},
|
|
182
|
-
param: {
|
|
183
|
-
anyOf: [
|
|
184
|
-
{
|
|
185
|
-
type: 'null'
|
|
186
|
-
},
|
|
187
|
-
{
|
|
188
|
-
type: 'object',
|
|
189
|
-
additionalProperties: true
|
|
190
|
-
}
|
|
191
|
-
],
|
|
192
|
-
description: 'Parameter of the action'
|
|
193
|
-
},
|
|
194
|
-
locate: {
|
|
195
|
-
type: [
|
|
196
|
-
'object',
|
|
197
|
-
'null'
|
|
198
|
-
],
|
|
199
|
-
properties: {
|
|
200
|
-
id: {
|
|
201
|
-
type: 'string'
|
|
202
|
-
},
|
|
203
|
-
prompt: {
|
|
204
|
-
type: 'string'
|
|
205
|
-
}
|
|
206
|
-
},
|
|
207
|
-
required: [
|
|
208
|
-
'id',
|
|
209
|
-
'prompt'
|
|
210
|
-
],
|
|
211
|
-
additionalProperties: false,
|
|
212
|
-
description: 'Location information for the target element'
|
|
213
|
-
}
|
|
214
|
-
},
|
|
215
|
-
required: [
|
|
216
|
-
'thought',
|
|
217
|
-
'type',
|
|
218
|
-
'param',
|
|
219
|
-
'locate'
|
|
220
|
-
],
|
|
221
|
-
additionalProperties: false
|
|
222
|
-
},
|
|
223
|
-
description: 'List of actions to be performed'
|
|
224
|
-
},
|
|
225
|
-
more_actions_needed_by_instruction: {
|
|
226
|
-
type: 'boolean',
|
|
227
|
-
description: 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.'
|
|
228
|
-
},
|
|
229
|
-
log: {
|
|
230
|
-
type: 'string',
|
|
231
|
-
description: 'Log what these planned actions do. Do not include further actions that have not been planned.'
|
|
232
|
-
},
|
|
233
|
-
error: {
|
|
234
|
-
type: [
|
|
235
|
-
'string',
|
|
236
|
-
'null'
|
|
237
|
-
],
|
|
238
|
-
description: 'Error messages about unexpected situations'
|
|
239
|
-
}
|
|
240
|
-
},
|
|
241
|
-
required: [
|
|
242
|
-
'actions',
|
|
243
|
-
'more_actions_needed_by_instruction',
|
|
244
|
-
'log',
|
|
245
|
-
'error'
|
|
246
|
-
],
|
|
247
|
-
additionalProperties: false
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
};
|
|
251
|
-
export { descriptionForAction, planSchema, systemPromptToTaskPlanning };
|
|
164
|
+
export { descriptionForAction, systemPromptToTaskPlanning };
|
|
252
165
|
|
|
253
166
|
//# sourceMappingURL=llm-planning.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- When the next step is to assert something, this is a very important step, you should think about it carefully and give a solid result. Write your result in the \"log\" field like this: \"Assert: <condition>. I think <...>, so the result is <true / false>\". You don't need to give the next one action when you are asserting something. If the assertion result is false, think this an fatal error and set the reason into the \"error\" field. If the assertion result is true, you can continue to the next step.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n // one of the supporting actions\n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n`;\n}\n\nexport const planSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'action_items',\n strict: false,\n schema: {\n type: 'object',\n strict: false,\n properties: {\n actions: {\n type: 'array',\n items: {\n type: 'object',\n strict: false,\n properties: {\n thought: {\n type: 'string',\n description:\n 'Reasons for generating this task, and why this task is feasible on this page',\n },\n type: {\n type: 'string',\n description: 'Type of action',\n },\n param: {\n anyOf: [\n { type: 'null' },\n {\n type: 'object',\n additionalProperties: true,\n },\n ],\n description: 'Parameter of the action',\n },\n locate: {\n type: ['object', 'null'],\n properties: {\n id: { type: 'string' },\n prompt: { type: 'string' },\n },\n required: ['id', 'prompt'],\n additionalProperties: false,\n description: 'Location information for the target element',\n },\n },\n required: ['thought', 'type', 'param', 'locate'],\n additionalProperties: false,\n },\n description: 'List of actions to be performed',\n },\n more_actions_needed_by_instruction: {\n type: 'boolean',\n description:\n 'If all the actions described in the instruction have been covered by this action and logs, set this field to false.',\n },\n log: {\n type: 'string',\n description:\n 'Log what these planned actions do. Do not include further actions that have not been planned.',\n },\n error: {\n type: ['string', 'null'],\n description: 'Error messages about unexpected situations',\n },\n },\n required: [\n 'actions',\n 'more_actions_needed_by_instruction',\n 'log',\n 'error',\n ],\n additionalProperties: false,\n },\n },\n};\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction","planSchema"],"mappings":";;AASA,MAAMA,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,uBAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACW,KAAKf,MAAM,IAAIgB,OAAO,OAAO,CAAClB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMiB,aACJ,AAAqC,cAArC,OAAQjB,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMkB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMZ,WAAWJ,YAAYC;gBAG7B,MAAMmB,cAAcL,eAAed;gBAGnC,IAAIoB,YAAY,GAAGF,gBAAgB,EAAE,EAAEf,UAAU;gBACjD,IAAIgB,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCxB,WAAW,IAAI,CAACyB;YAClB;YAIF,IAAIzB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAAC0B;oBAClB3B,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE2B,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiB1B,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC1C,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC/C,IAAImB,AAAmB,iBAAnBA,gBAAiCnB,WAAW;YAGrD,MAAMgB,cAAc,iBAAiBvB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAI2B,mBAAmB,CAAC,SAAS,EAAEpB,UAAU;YAC7C,IAAIgB,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB7B,OAAO,IAAI,CAAC6B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEhC,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe+B,2BAA2B,EAC/CC,WAAW,EACXrC,MAAM,EACNsC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACtC,QAClB,MAAM,IAAIuC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAClC,SACtCD,qBACLC,QACAJ,cAAcuC,cAActC,SAASyC;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE7C,mBAAmB;;;;;;;;AAQvB,CAAC;AACD;AAEO,MAAM8C,aAAuC;IAClD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,QAAQ;YACR,YAAY;gBACV,SAAS;oBACP,MAAM;oBACN,OAAO;wBACL,MAAM;wBACN,QAAQ;wBACR,YAAY;4BACV,SAAS;gCACP,MAAM;gCACN,aACE;4BACJ;4BACA,MAAM;gCACJ,MAAM;gCACN,aAAa;4BACf;4BACA,OAAO;gCACL,OAAO;oCACL;wCAAE,MAAM;oCAAO;oCACf;wCACE,MAAM;wCACN,sBAAsB;oCACxB;iCACD;gCACD,aAAa;4BACf;4BACA,QAAQ;gCACN,MAAM;oCAAC;oCAAU;iCAAO;gCACxB,YAAY;oCACV,IAAI;wCAAE,MAAM;oCAAS;oCACrB,QAAQ;wCAAE,MAAM;oCAAS;gCAC3B;gCACA,UAAU;oCAAC;oCAAM;iCAAS;gCAC1B,sBAAsB;gCACtB,aAAa;4BACf;wBACF;wBACA,UAAU;4BAAC;4BAAW;4BAAQ;4BAAS;yBAAS;wBAChD,sBAAsB;oBACxB;oBACA,aAAa;gBACf;gBACA,oCAAoC;oBAClC,MAAM;oBACN,aACE;gBACJ;gBACA,KAAK;oBACH,MAAM;oBACN,aACE;gBACJ;gBACA,OAAO;oBACL,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBACR;gBACA;gBACA;gBACA;aACD;YACD,sBAAsB;QACxB;IACF;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-planning.mjs","sources":["../../../../src/ai-model/prompt/llm-planning.ts"],"sourcesContent":["import type { DeviceAction } from '@/types';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport type { ResponseFormatJSONSchema } from 'openai/resources/index';\nimport type { z } from 'zod';\nimport { ifMidsceneLocatorField } from '../../common';\nimport { bboxDescription } from './common';\n\n// Note: put the log field first to trigger the CoT\n\nconst commonOutputFields = `\"error\"?: string, // Error messages about unexpected situations, if any. Only think it is an error when the situation is not foreseeable according to the instruction. Use the same language as the user's instruction.\n \"more_actions_needed_by_instruction\": boolean, // Consider if there is still more action(s) to do after the action in \"Log\" is done, according to the instruction. If so, set this field to true. Otherwise, set it to false.`;\n\nconst vlLocateParam = (vlMode: TVlModeTypes | undefined) => {\n if (vlMode) {\n return `{bbox: [number, number, number, number], prompt: string } // ${bboxDescription(vlMode)}`;\n }\n return '{ prompt: string /* description of the target element */ }';\n};\n\nexport const descriptionForAction = (\n action: DeviceAction<any>,\n locatorSchemaTypeDescription: string,\n) => {\n const tab = ' ';\n const fields: string[] = [];\n\n // Add the action type field\n fields.push(`- type: \"${action.name}\"`);\n\n // Handle paramSchema if it exists\n if (action.paramSchema) {\n const paramLines: string[] = [];\n\n // Check if paramSchema is a ZodObject with shape\n const schema = action.paramSchema as any;\n const isZodObject = schema._def?.typeName === 'ZodObject';\n\n if (isZodObject && schema.shape) {\n // Original logic for ZodObject schemas\n const shape = schema.shape;\n\n // Helper function to get type name from zod schema\n const getTypeName = (field: any): string => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n const actualField = unwrapField(field);\n const fieldTypeName = actualField._def?.typeName;\n\n if (fieldTypeName === 'ZodString') return 'string';\n if (fieldTypeName === 'ZodNumber') return 'number';\n if (fieldTypeName === 'ZodBoolean') return 'boolean';\n if (fieldTypeName === 'ZodArray') return 'array';\n if (fieldTypeName === 'ZodObject') {\n // Check if this is a passthrough object (like MidsceneLocation)\n if (ifMidsceneLocatorField(actualField)) {\n return locatorSchemaTypeDescription;\n }\n return 'object';\n }\n if (fieldTypeName === 'ZodEnum') {\n const values =\n (actualField._def?.values as unknown[] | undefined)\n ?.map((option: unknown) => String(`'${option}'`))\n .join(', ') ?? 'enum';\n\n return `enum(${values})`;\n }\n // Handle ZodUnion by taking the first option (for display purposes)\n if (fieldTypeName === 'ZodUnion') {\n const options = actualField._def?.options as any[] | undefined;\n if (options && options.length > 0) {\n // For unions, list all types\n const types = options.map((opt: any) => getTypeName(opt));\n return types.join(' | ');\n }\n return 'union';\n }\n\n console.warn(\n 'failed to parse Zod type. This may lead to wrong params from the LLM.\\n',\n actualField._def,\n );\n return actualField.toString();\n };\n\n // Helper function to get description from zod schema\n const getDescription = (field: z.ZodTypeAny): string | null => {\n // Recursively unwrap optional, nullable, and other wrapper types to get the actual inner type\n const unwrapField = (f: any): any => {\n if (!f._def) return f;\n\n const typeName = f._def.typeName;\n\n // Handle wrapper types that have innerType\n if (\n typeName === 'ZodOptional' ||\n typeName === 'ZodNullable' ||\n typeName === 'ZodDefault'\n ) {\n return unwrapField(f._def.innerType);\n }\n\n // Handle ZodEffects (transformations, refinements, preprocessors)\n if (typeName === 'ZodEffects') {\n // For ZodEffects, unwrap the schema field which contains the underlying type\n if (f._def.schema) {\n return unwrapField(f._def.schema);\n }\n }\n\n return f;\n };\n\n // Check for direct description on the original field (wrapper may have description)\n if ('description' in field) {\n return field.description || null;\n }\n\n const actualField = unwrapField(field);\n\n // Check for description on the unwrapped field\n if ('description' in actualField) {\n return actualField.description || null;\n }\n\n // Check for MidsceneLocation fields and add description\n if (actualField._def?.typeName === 'ZodObject') {\n if ('midscene_location_field_flag' in actualField._def.shape()) {\n return 'Location information for the target element';\n }\n }\n\n return null;\n };\n\n for (const [key, field] of Object.entries(shape)) {\n if (field && typeof field === 'object') {\n // Check if field is optional\n const isOptional =\n typeof (field as any).isOptional === 'function' &&\n (field as any).isOptional();\n const keyWithOptional = isOptional ? `${key}?` : key;\n\n // Get the type name\n const typeName = getTypeName(field);\n\n // Get description\n const description = getDescription(field as z.ZodTypeAny);\n\n // Build param line for this field\n let paramLine = `${keyWithOptional}: ${typeName}`;\n if (description) {\n paramLine += ` // ${description}`;\n }\n\n paramLines.push(paramLine);\n }\n }\n\n // Add the param section to fields if there are paramLines\n if (paramLines.length > 0) {\n fields.push('- param:');\n paramLines.forEach((line) => {\n fields.push(` - ${line}`);\n });\n }\n } else {\n // Handle non-object schemas (string, number, etc.)\n // For simple primitive types, the param should be passed directly as the value\n const schemaTypeName = schema._def?.typeName;\n let typeName = 'unknown';\n\n if (schemaTypeName === 'ZodString') typeName = 'string';\n else if (schemaTypeName === 'ZodNumber') typeName = 'number';\n else if (schemaTypeName === 'ZodBoolean') typeName = 'boolean';\n\n // Get description if available\n const description = 'description' in schema ? schema.description : null;\n\n // For simple types, indicate that param should be the direct value, not an object\n let paramDescription = `- param: ${typeName}`;\n if (description) {\n paramDescription += ` // ${description}`;\n }\n paramDescription += ' (pass the value directly, not as an object)';\n\n fields.push(paramDescription);\n }\n }\n\n return `- ${action.name}, ${action.description || 'No description provided'}\n${tab}${fields.join(`\\n${tab}`)}\n`.trim();\n};\n\nexport async function systemPromptToTaskPlanning({\n actionSpace,\n vlMode,\n includeBbox,\n}: {\n actionSpace: DeviceAction<any>[];\n vlMode: TVlModeTypes | undefined;\n includeBbox: boolean;\n}) {\n // Validate parameters: if includeBbox is true, vlMode must be defined\n if (includeBbox && !vlMode) {\n throw new Error(\n 'vlMode cannot be undefined when includeBbox is true. A valid vlMode is required for bbox-based location.',\n );\n }\n\n const actionDescriptionList = actionSpace.map((action) => {\n return descriptionForAction(\n action,\n vlLocateParam(includeBbox ? vlMode : undefined),\n );\n });\n const actionList = actionDescriptionList.join('\\n');\n\n const logFieldInstruction = `\n## About the \\`log\\` field (preamble message)\n\nThe \\`log\\` field is a brief preamble message to the user explaining what you’re about to do. It should follow these principles and examples:\n\n- **Use the same language as the user's instruction**\n- **Keep it concise**: be no more than 1-2 sentences, focused on immediate, tangible next steps. (8–12 words or Chinese characters for quick updates).\n- **Build on prior context**: if this is not the first action to be done, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.\n- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.\n\n**Examples:**\n- \"Click the login button\"\n- \"Scroll to find the 'Yes' button in popup\"\n- \"Previous actions failed to find the 'Yes' button, i will try again\"\n- \"Go back to find the login button\"\n`;\n\n return `\nTarget: User will give you an instruction, some screenshots and previous logs indicating what have been done. Your task is to plan the next one action according to current situation to accomplish the instruction.\n\nPlease tell what the next one action is (or null if no action should be done) to do the tasks the instruction requires. \n\n## Rules\n\n- Don't give extra actions or plans beyond the instruction. For example, don't try to submit the form if the instruction is only to fill something.\n- Give just the next ONE action you should do\n- Consider the current screenshot and give the action that is most likely to accomplish the instruction. For example, if the next step is to click a button but it's not visible in the screenshot, you should try to find it first instead of give a click action.\n- Make sure the previous actions are completed successfully before performing the next step\n- If there are some error messages reported by the previous actions, don't give up, try parse a new action to recover. If the error persists for more than 5 times, you should think this is an error and set the \"error\" field to the error message.\n- If there is nothing to do but waiting, set the \"sleep\" field to the positive waiting time in milliseconds and null for the \"action\" field.\n- When the next step is to assert something, this is a very important step, you should think about it carefully and give a solid result. Write your result in the \"log\" field like this: \"Assert: <condition>. I think <...>, so the result is <true / false>\". You don't need to give the next one action when you are asserting something. If the assertion result is false, think this an fatal error and set the reason into the \"error\" field. If the assertion result is true, you can continue to the next step.\n\n## Supporting actions\n${actionList}\n\n${logFieldInstruction}\n\n## Return format\n\nReturn in JSON format:\n{\n \"log\": string, // a brief preamble to the user explaining what you’re about to do\n ${commonOutputFields}\n \"action\": \n {\n \"type\": string, // the type of the action\n \"param\"?: { // The parameter of the action, if any\n \"locate\": { // for example, if the action is \"Tap\", the \"locate\" field is required\n \"prompt\": string,\n },\n }, \n } | null,\n ,\n \"sleep\"?: number, // The sleep time after the action, in milliseconds.\n}\n`;\n}\n"],"names":["commonOutputFields","vlLocateParam","vlMode","bboxDescription","descriptionForAction","action","locatorSchemaTypeDescription","tab","fields","paramLines","schema","isZodObject","shape","getTypeName","field","unwrapField","f","typeName","actualField","fieldTypeName","ifMidsceneLocatorField","values","option","String","options","types","opt","console","getDescription","key","Object","isOptional","keyWithOptional","description","paramLine","line","schemaTypeName","paramDescription","systemPromptToTaskPlanning","actionSpace","includeBbox","Error","actionDescriptionList","undefined","actionList","logFieldInstruction"],"mappings":";;AASA,MAAMA,qBAAqB,CAAC;+NACmM,CAAC;AAEhO,MAAMC,gBAAgB,CAACC;IACrB,IAAIA,QACF,OAAO,CAAC,6DAA6D,EAAEC,gBAAgBD,SAAS;IAElG,OAAO;AACT;AAEO,MAAME,uBAAuB,CAClCC,QACAC;IAEA,MAAMC,MAAM;IACZ,MAAMC,SAAmB,EAAE;IAG3BA,OAAO,IAAI,CAAC,CAAC,SAAS,EAAEH,OAAO,IAAI,CAAC,CAAC,CAAC;IAGtC,IAAIA,OAAO,WAAW,EAAE;QACtB,MAAMI,aAAuB,EAAE;QAG/B,MAAMC,SAASL,OAAO,WAAW;QACjC,MAAMM,cAAcD,OAAO,IAAI,EAAE,aAAa;QAE9C,IAAIC,eAAeD,OAAO,KAAK,EAAE;YAE/B,MAAME,QAAQF,OAAO,KAAK;YAG1B,MAAMG,cAAc,CAACC;gBAEnB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAEA,MAAME,cAAcH,YAAYD;gBAChC,MAAMK,gBAAgBD,YAAY,IAAI,EAAE;gBAExC,IAAIC,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,gBAAlBA,eAA+B,OAAO;gBAC1C,IAAIA,AAAkB,iBAAlBA,eAAgC,OAAO;gBAC3C,IAAIA,AAAkB,eAAlBA,eAA8B,OAAO;gBACzC,IAAIA,AAAkB,gBAAlBA,eAA+B;oBAEjC,IAAIC,uBAAuBF,cACzB,OAAOZ;oBAET,OAAO;gBACT;gBACA,IAAIa,AAAkB,cAAlBA,eAA6B;oBAC/B,MAAME,SACHH,YAAY,IAAI,EAAE,QACf,IAAI,CAACI,SAAoBC,OAAO,CAAC,CAAC,EAAED,OAAO,CAAC,CAAC,GAC9C,KAAK,SAAS;oBAEnB,OAAO,CAAC,KAAK,EAAED,OAAO,CAAC,CAAC;gBAC1B;gBAEA,IAAIF,AAAkB,eAAlBA,eAA8B;oBAChC,MAAMK,UAAUN,YAAY,IAAI,EAAE;oBAClC,IAAIM,WAAWA,QAAQ,MAAM,GAAG,GAAG;wBAEjC,MAAMC,QAAQD,QAAQ,GAAG,CAAC,CAACE,MAAab,YAAYa;wBACpD,OAAOD,MAAM,IAAI,CAAC;oBACpB;oBACA,OAAO;gBACT;gBAEAE,QAAQ,IAAI,CACV,2EACAT,YAAY,IAAI;gBAElB,OAAOA,YAAY,QAAQ;YAC7B;YAGA,MAAMU,iBAAiB,CAACd;gBAEtB,MAAMC,cAAc,CAACC;oBACnB,IAAI,CAACA,EAAE,IAAI,EAAE,OAAOA;oBAEpB,MAAMC,WAAWD,EAAE,IAAI,CAAC,QAAQ;oBAGhC,IACEC,AAAa,kBAAbA,YACAA,AAAa,kBAAbA,YACAA,AAAa,iBAAbA,UAEA,OAAOF,YAAYC,EAAE,IAAI,CAAC,SAAS;oBAIrC,IAAIC,AAAa,iBAAbA,UAEF;wBAAA,IAAID,EAAE,IAAI,CAAC,MAAM,EACf,OAAOD,YAAYC,EAAE,IAAI,CAAC,MAAM;oBAClC;oBAGF,OAAOA;gBACT;gBAGA,IAAI,iBAAiBF,OACnB,OAAOA,MAAM,WAAW,IAAI;gBAG9B,MAAMI,cAAcH,YAAYD;gBAGhC,IAAI,iBAAiBI,aACnB,OAAOA,YAAY,WAAW,IAAI;gBAIpC,IAAIA,YAAY,IAAI,EAAE,aAAa,aACjC;oBAAA,IAAI,kCAAkCA,YAAY,IAAI,CAAC,KAAK,IAC1D,OAAO;gBACT;gBAGF,OAAO;YACT;YAEA,KAAK,MAAM,CAACW,KAAKf,MAAM,IAAIgB,OAAO,OAAO,CAAClB,OACxC,IAAIE,SAAS,AAAiB,YAAjB,OAAOA,OAAoB;gBAEtC,MAAMiB,aACJ,AAAqC,cAArC,OAAQjB,MAAc,UAAU,IAC/BA,MAAc,UAAU;gBAC3B,MAAMkB,kBAAkBD,aAAa,GAAGF,IAAI,CAAC,CAAC,GAAGA;gBAGjD,MAAMZ,WAAWJ,YAAYC;gBAG7B,MAAMmB,cAAcL,eAAed;gBAGnC,IAAIoB,YAAY,GAAGF,gBAAgB,EAAE,EAAEf,UAAU;gBACjD,IAAIgB,aACFC,aAAa,CAAC,IAAI,EAAED,aAAa;gBAGnCxB,WAAW,IAAI,CAACyB;YAClB;YAIF,IAAIzB,WAAW,MAAM,GAAG,GAAG;gBACzBD,OAAO,IAAI,CAAC;gBACZC,WAAW,OAAO,CAAC,CAAC0B;oBAClB3B,OAAO,IAAI,CAAC,CAAC,IAAI,EAAE2B,MAAM;gBAC3B;YACF;QACF,OAAO;YAGL,MAAMC,iBAAiB1B,OAAO,IAAI,EAAE;YACpC,IAAIO,WAAW;YAEf,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC1C,IAAImB,AAAmB,gBAAnBA,gBAAgCnB,WAAW;iBAC/C,IAAImB,AAAmB,iBAAnBA,gBAAiCnB,WAAW;YAGrD,MAAMgB,cAAc,iBAAiBvB,SAASA,OAAO,WAAW,GAAG;YAGnE,IAAI2B,mBAAmB,CAAC,SAAS,EAAEpB,UAAU;YAC7C,IAAIgB,aACFI,oBAAoB,CAAC,IAAI,EAAEJ,aAAa;YAE1CI,oBAAoB;YAEpB7B,OAAO,IAAI,CAAC6B;QACd;IACF;IAEA,OAAO,CAAC,EAAE,EAAEhC,OAAO,IAAI,CAAC,EAAE,EAAEA,OAAO,WAAW,IAAI,0BAA0B;AAC9E,EAAEE,MAAMC,OAAO,IAAI,CAAC,CAAC,EAAE,EAAED,KAAK,EAAE;AAChC,CAAC,CAAC,IAAI;AACN;AAEO,eAAe+B,2BAA2B,EAC/CC,WAAW,EACXrC,MAAM,EACNsC,WAAW,EAKZ;IAEC,IAAIA,eAAe,CAACtC,QAClB,MAAM,IAAIuC,MACR;IAIJ,MAAMC,wBAAwBH,YAAY,GAAG,CAAC,CAAClC,SACtCD,qBACLC,QACAJ,cAAcuC,cAActC,SAASyC;IAGzC,MAAMC,aAAaF,sBAAsB,IAAI,CAAC;IAE9C,MAAMG,sBAAsB,CAAC;;;;;;;;;;;;;;;AAe/B,CAAC;IAEC,OAAO,CAAC;;;;;;;;;;;;;;;;AAgBV,EAAED,WAAW;;AAEb,EAAEC,oBAAoB;;;;;;;EAOpB,EAAE7C,mBAAmB;;;;;;;;;;;;;AAavB,CAAC;AACD"}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { AIResponseFormat } from "../../types.mjs";
|
|
2
1
|
import { MIDSCENE_LANGFUSE_DEBUG, MIDSCENE_LANGSMITH_DEBUG, MIDSCENE_MODEL_MAX_TOKENS, OPENAI_MAX_TOKENS, globalConfigManager } from "@midscene/shared/env";
|
|
3
2
|
import { getDebug } from "@midscene/shared/logger";
|
|
4
3
|
import { assert, ifInBrowser } from "@midscene/shared/utils";
|
|
@@ -6,9 +5,6 @@ import { HttpsProxyAgent } from "https-proxy-agent";
|
|
|
6
5
|
import { jsonrepair } from "jsonrepair";
|
|
7
6
|
import openai_0 from "openai";
|
|
8
7
|
import { SocksProxyAgent } from "socks-proxy-agent";
|
|
9
|
-
import { AIActionType } from "../../common.mjs";
|
|
10
|
-
import { assertSchema } from "../prompt/assertion.mjs";
|
|
11
|
-
import { planSchema } from "../prompt/llm-planning.mjs";
|
|
12
8
|
async function createChatClient({ AIActionTypeValue, modelConfig }) {
|
|
13
9
|
const { socksProxy, httpProxy, modelName, openaiBaseURL, openaiApiKey, openaiExtraConfig, modelDescription, uiTarsModelVersion: uiTarsVersion, vlMode, createOpenAIClient, timeout } = modelConfig;
|
|
14
10
|
let proxyAgent;
|
|
@@ -65,7 +61,6 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
65
61
|
AIActionTypeValue,
|
|
66
62
|
modelConfig
|
|
67
63
|
});
|
|
68
|
-
const responseFormat = getResponseFormat(modelName, AIActionTypeValue);
|
|
69
64
|
const maxTokens = globalConfigManager.getEnvConfigValue(MIDSCENE_MODEL_MAX_TOKENS) ?? globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);
|
|
70
65
|
const debugCall = getDebug('ai:call');
|
|
71
66
|
const debugProfileStats = getDebug('ai:profile:stats');
|
|
@@ -104,7 +99,6 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
104
99
|
const stream = await completion.create({
|
|
105
100
|
model: modelName,
|
|
106
101
|
messages,
|
|
107
|
-
response_format: responseFormat,
|
|
108
102
|
...commonConfig
|
|
109
103
|
}, {
|
|
110
104
|
stream: true
|
|
@@ -151,7 +145,6 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
151
145
|
const result = await completion.create({
|
|
152
146
|
model: modelName,
|
|
153
147
|
messages,
|
|
154
|
-
response_format: responseFormat,
|
|
155
148
|
...commonConfig
|
|
156
149
|
});
|
|
157
150
|
timeCost = Date.now() - startTime;
|
|
@@ -184,30 +177,6 @@ async function callAI(messages, AIActionTypeValue, modelConfig, options) {
|
|
|
184
177
|
throw newError;
|
|
185
178
|
}
|
|
186
179
|
}
|
|
187
|
-
const getResponseFormat = (modelName, AIActionTypeValue)=>{
|
|
188
|
-
let responseFormat;
|
|
189
|
-
if (modelName.includes('gpt-4')) switch(AIActionTypeValue){
|
|
190
|
-
case AIActionType.ASSERT:
|
|
191
|
-
responseFormat = assertSchema;
|
|
192
|
-
break;
|
|
193
|
-
case AIActionType.PLAN:
|
|
194
|
-
responseFormat = planSchema;
|
|
195
|
-
break;
|
|
196
|
-
case AIActionType.EXTRACT_DATA:
|
|
197
|
-
case AIActionType.DESCRIBE_ELEMENT:
|
|
198
|
-
responseFormat = {
|
|
199
|
-
type: AIResponseFormat.JSON
|
|
200
|
-
};
|
|
201
|
-
break;
|
|
202
|
-
case AIActionType.TEXT:
|
|
203
|
-
responseFormat = void 0;
|
|
204
|
-
break;
|
|
205
|
-
}
|
|
206
|
-
if ('gpt-4o-2024-05-13' === modelName && AIActionTypeValue !== AIActionType.TEXT) responseFormat = {
|
|
207
|
-
type: AIResponseFormat.JSON
|
|
208
|
-
};
|
|
209
|
-
return responseFormat;
|
|
210
|
-
};
|
|
211
180
|
async function callAIWithObjectResponse(messages, AIActionTypeValue, modelConfig) {
|
|
212
181
|
const response = await callAI(messages, AIActionTypeValue, modelConfig);
|
|
213
182
|
assert(response, 'empty response');
|
|
@@ -276,6 +245,6 @@ function safeParseJson(input, vlMode) {
|
|
|
276
245
|
}
|
|
277
246
|
throw Error(`failed to parse json response: ${input}`);
|
|
278
247
|
}
|
|
279
|
-
export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock,
|
|
248
|
+
export { callAI, callAIWithObjectResponse, callAIWithStringResponse, extractJSONFromCodeBlock, preprocessDoubaoBboxJson, safeParseJson };
|
|
280
249
|
|
|
281
250
|
//# sourceMappingURL=index.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport { AIActionType, type AIArgs } from '../../common';\nimport { assertSchema } from '../prompt/assertion';\nimport { planSchema } from '../prompt/llm-planning';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n ...(proxyAgent ? { httpAgent: proxyAgent as any } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const { completion, modelName, modelDescription, uiTarsVersion, vlMode } =\n await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const responseFormat = getResponseFormat(modelName, AIActionTypeValue);\n\n const maxTokens =\n globalConfigManager.getEnvConfigValue(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : undefined,\n stream: !!isStreaming,\n max_tokens: typeof maxTokens === 'number' ? maxTokens : undefined,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n response_format: responseFormat,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport const getResponseFormat = (\n modelName: string,\n AIActionTypeValue: AIActionType,\n):\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject => {\n let responseFormat:\n | OpenAI.ChatCompletionCreateParams['response_format']\n | OpenAI.ResponseFormatJSONObject\n | undefined;\n\n if (modelName.includes('gpt-4')) {\n switch (AIActionTypeValue) {\n case AIActionType.ASSERT:\n responseFormat = assertSchema;\n break;\n case AIActionType.PLAN:\n responseFormat = planSchema;\n break;\n case AIActionType.EXTRACT_DATA:\n case AIActionType.DESCRIBE_ELEMENT:\n responseFormat = { type: AIResponseFormat.JSON };\n break;\n case AIActionType.TEXT:\n // No response format for plain text - return as-is\n responseFormat = undefined;\n break;\n }\n }\n\n // gpt-4o-2024-05-13 only supports json_object response format\n // Skip for plain text to allow string output\n if (\n modelName === 'gpt-4o-2024-05-13' &&\n AIActionTypeValue !== AIActionType.TEXT\n ) {\n responseFormat = { type: AIResponseFormat.JSON };\n }\n\n return responseFormat;\n};\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; contentString: string; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch {}\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","ifInBrowser","Error","console","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","responseFormat","getResponseFormat","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","undefined","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","result","JSON","assert","e","newError","AIActionType","assertSchema","planSchema","AIResponseFormat","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","Number","parsed","jsonrepair","jsonString"],"mappings":";;;;;;;;;;;AAyBA,eAAeA,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIZ,WAAW;QACbW,WAAW,oBAAoBX;QAC/BU,aAAa,IAAIG,gBAAgBb;IACnC,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBZ;QAChCW,aAAa,IAAII,gBAAgBf;IACnC;IAEA,MAAMgB,gBAAgB;QACpB,SAASb;QACT,QAAQC;QACR,GAAIO,aAAa;YAAE,WAAWA;QAAkB,IAAI,CAAC,CAAC;QACtD,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMO,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIC,aACF,MAAM,IAAIC,MAAM;QAElBC,QAAQ,GAAG,CAAC;QAEZ,MAAMC,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCN,SAASO,WAAWP;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACO,0BAC1C;QACA,IAAIL,aACF,MAAM,IAAIC,MAAM;QAElBC,QAAQ,GAAG,CAAC;QAEZ,MAAMI,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCT,SAASU,cAAcV;IACzB;IAEA,IAAIV,oBAAoB;QACtB,MAAMqB,gBAAgB,MAAMrB,mBAAmBQ,YAAYD;QAE3D,IAAIc,eACFX,SAASW;IAEb;IAEA,OAAO;QACL,YAAYX,OAAO,IAAI,CAAC,WAAW;QACnCjB;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAeuB,OACpBC,QAAsC,EACtClC,iBAA+B,EAC/BC,WAAyB,EACzBkC,OAGC;IAED,MAAM,EAAEC,UAAU,EAAEhC,SAAS,EAAEI,gBAAgB,EAAEC,aAAa,EAAEC,MAAM,EAAE,GACtE,MAAMX,iBAAiB;QACrBC;QACAC;IACF;IAEF,MAAMoC,iBAAiBC,kBAAkBlC,WAAWJ;IAEpD,MAAMuC,YACJjB,oBAAoB,iBAAiB,CAACkB,8BACtClB,oBAAoB,iBAAiB,CAACmB;IACxC,MAAMC,YAAY3B,SAAS;IAC3B,MAAM4B,oBAAoB5B,SAAS;IACnC,MAAM6B,qBAAqB7B,SAAS;IAEpC,MAAM8B,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAcZ,SAAS,UAAUA,SAAS;IAChD,IAAIa;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY/C;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMsD,eAAe;QACnB,aAAa7C,AAAW,kBAAXA,SAA2B,MAAM8C;QAC9C,QAAQ,CAAC,CAACT;QACV,YAAY,AAAqB,YAArB,OAAOR,YAAyBA,YAAYiB;QACxD,GAAI9C,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACFgC,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAE3C,WAAW;QAGrE,IAAI2C,aAAa;YACf,MAAMU,SAAU,MAAMrB,WAAW,MAAM,CACrC;gBACE,OAAOhC;gBACP8B;gBACA,iBAAiBG;gBACjB,GAAGkB,YAAY;YACjB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMG,SAASD,OAAQ;gBAChC,MAAMT,UAAUU,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbR,QAAQQ,MAAM,KAAK;gBAGrB,IAAIV,WAAWW,mBAAmB;oBAChCV,eAAeD;oBACf,MAAMY,YAAiC;wBACrCZ;wBACAW;wBACAV;wBACA,YAAY;wBACZ,OAAOO;oBACT;oBACArB,QAAQ,OAAO,CAAEyB;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCP,WAAWL,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACK,OAAO;wBAEV,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACb,YAAY,MAAM,GAAG;wBAElCC,QAAQ;4BACN,eAAeW;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTd;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOG,eAAeF;oBACxB;oBACAf,QAAQ,OAAO,CAAE4B;oBACjB;gBACF;YACF;YACAf,UAAUC;YACVN,kBACE,CAAC,iBAAiB,EAAEvC,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEyC,UAAU;QAEvF,OAAO;YACL,MAAMa,SAAS,MAAM5B,WAAW,MAAM,CAAC;gBACrC,OAAOhC;gBACP8B;gBACA,iBAAiBG;gBACjB,GAAGkB,YAAY;YACjB;YACAJ,WAAWL,KAAK,GAAG,KAAKD;YAExBF,kBACE,CAAC,OAAO,EAAEvC,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEuD,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEb,SAAS,aAAa,EAAEa,OAAO,WAAW,IAAI,IAAI;YAG3TpB,mBAAmB,CAAC,oBAAoB,EAAEqB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;YAExEE,OACEF,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;YAEhEhB,UAAUgB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;YAC3Cd,QAAQc,OAAO,KAAK;QACtB;QAEAtB,UAAU,CAAC,UAAU,EAAEM,SAAS;QAChCkB,OAAOlB,SAAS;QAGhB,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEd,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAeW;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASb,WAAW;YACpB,OAAOI,eAAeF;YACtB,YAAY,CAAC,CAACH;QAChB;IACF,EAAE,OAAOoB,GAAQ;QACfzC,QAAQ,KAAK,CAAC,kBAAkByC;QAChC,MAAMC,WAAW,IAAI3C,MACnB,CAAC,eAAe,EAAEsB,cAAc,eAAe,GAAG,kBAAkB,EAAEoB,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,MAAM9B,oBAAoB,CAC/BlC,WACAJ;IAIA,IAAIqC;IAKJ,IAAIjC,UAAU,QAAQ,CAAC,UACrB,OAAQJ;QACN,KAAKqE,aAAa,MAAM;YACtBhC,iBAAiBiC;YACjB;QACF,KAAKD,aAAa,IAAI;YACpBhC,iBAAiBkC;YACjB;QACF,KAAKF,aAAa,YAAY;QAC9B,KAAKA,aAAa,gBAAgB;YAChChC,iBAAiB;gBAAE,MAAMmC,iBAAiB,IAAI;YAAC;YAC/C;QACF,KAAKH,aAAa,IAAI;YAEpBhC,iBAAiBmB;YACjB;IACJ;IAKF,IACEpD,AAAc,wBAAdA,aACAJ,sBAAsBqE,aAAa,IAAI,EAEvChC,iBAAiB;QAAE,MAAMmC,iBAAiB,IAAI;IAAC;IAGjD,OAAOnC;AACT;AAEO,eAAeoC,yBACpBvC,QAAsC,EACtClC,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAMyE,WAAW,MAAMzC,OAAOC,UAAUlC,mBAAmBC;IAC3DiE,OAAOQ,UAAU;IACjB,MAAMhE,SAAST,YAAY,MAAM;IACjC,MAAM0E,cAAcC,cAAcF,SAAS,OAAO,EAAEhE;IACpD,OAAO;QACL,SAASiE;QACT,eAAeD,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;IACvB;AACF;AAEO,eAAeG,yBACpBC,IAAY,EACZ9E,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM,EAAE+C,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMjB,OAAO6C,MAAM9E,mBAAmBC;IACjE,OAAO;QAAE+C;QAASE;IAAM;AAC1B;AAEO,SAAS6B,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAQA,SAASC,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASV,cAAcQ,KAAa,EAAE1E,MAAgC;IAC3E,MAAMqF,kBAAkBhB,yBAAyBK;IAEjD,IAAIW,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAIC;IAGT,IAAIC;IACJ,IAAI;QACFA,SAAShC,KAAK,KAAK,CAAC8B;QACpB,OAAOV,oBAAoBY;IAC7B,EAAE,OAAM,CAAC;IACT,IAAI;QACFA,SAAShC,KAAK,KAAK,CAACiC,WAAWH;QAC/B,OAAOV,oBAAoBY;IAC7B,EAAE,OAAO9B,GAAG,CAAC;IAEb,IAAIzD,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAMyF,aAAahB,yBAAyBY;QAC5CE,SAAShC,KAAK,KAAK,CAACiC,WAAWC;QAC/B,OAAOd,oBAAoBY;IAC7B;IACA,MAAMxE,MAAM,CAAC,+BAA+B,EAAE2D,OAAO;AACvD"}
|
|
1
|
+
{"version":3,"file":"ai-model/service-caller/index.mjs","sources":["../../../../src/ai-model/service-caller/index.ts"],"sourcesContent":["import { AIResponseFormat, type AIUsageInfo } from '@/types';\nimport type { CodeGenerationChunk, StreamingCallback } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_LANGFUSE_DEBUG,\n MIDSCENE_LANGSMITH_DEBUG,\n MIDSCENE_MODEL_MAX_TOKENS,\n OPENAI_MAX_TOKENS,\n type TVlModeTypes,\n type UITarsModelVersion,\n globalConfigManager,\n} from '@midscene/shared/env';\n\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert, ifInBrowser } from '@midscene/shared/utils';\nimport { HttpsProxyAgent } from 'https-proxy-agent';\nimport { jsonrepair } from 'jsonrepair';\nimport OpenAI from 'openai';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport type { Stream } from 'openai/streaming';\nimport { SocksProxyAgent } from 'socks-proxy-agent';\nimport type { AIActionType, AIArgs } from '../../common';\n\nasync function createChatClient({\n AIActionTypeValue,\n modelConfig,\n}: {\n AIActionTypeValue: AIActionType;\n modelConfig: IModelConfig;\n}): Promise<{\n completion: OpenAI.Chat.Completions;\n modelName: string;\n modelDescription: string;\n uiTarsVersion?: UITarsModelVersion;\n vlMode: TVlModeTypes | undefined;\n}> {\n const {\n socksProxy,\n httpProxy,\n modelName,\n openaiBaseURL,\n openaiApiKey,\n openaiExtraConfig,\n modelDescription,\n uiTarsModelVersion: uiTarsVersion,\n vlMode,\n createOpenAIClient,\n timeout,\n } = modelConfig;\n\n let proxyAgent = undefined;\n const debugProxy = getDebug('ai:call:proxy');\n if (httpProxy) {\n debugProxy('using http proxy', httpProxy);\n proxyAgent = new HttpsProxyAgent(httpProxy);\n } else if (socksProxy) {\n debugProxy('using socks proxy', socksProxy);\n proxyAgent = new SocksProxyAgent(socksProxy);\n }\n\n const openAIOptions = {\n baseURL: openaiBaseURL,\n apiKey: openaiApiKey,\n ...(proxyAgent ? { httpAgent: proxyAgent as any } : {}),\n ...openaiExtraConfig,\n ...(typeof timeout === 'number' ? { timeout } : {}),\n dangerouslyAllowBrowser: true,\n };\n\n const baseOpenAI = new OpenAI(openAIOptions);\n\n let openai: OpenAI = baseOpenAI;\n\n // LangSmith wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGSMITH_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langsmith is not supported in browser');\n }\n console.log('DEBUGGING MODE: langsmith wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langsmithModule = 'langsmith/wrappers';\n const { wrapOpenAI } = await import(langsmithModule);\n openai = wrapOpenAI(openai);\n }\n\n // Langfuse wrapper\n if (\n openai &&\n globalConfigManager.getEnvConfigInBoolean(MIDSCENE_LANGFUSE_DEBUG)\n ) {\n if (ifInBrowser) {\n throw new Error('langfuse is not supported in browser');\n }\n console.log('DEBUGGING MODE: langfuse wrapper enabled');\n // Use variable to prevent static analysis by bundlers\n const langfuseModule = 'langfuse';\n const { observeOpenAI } = await import(langfuseModule);\n openai = observeOpenAI(openai);\n }\n\n if (createOpenAIClient) {\n const wrappedClient = await createOpenAIClient(baseOpenAI, openAIOptions);\n\n if (wrappedClient) {\n openai = wrappedClient as OpenAI;\n }\n }\n\n return {\n completion: openai.chat.completions,\n modelName,\n modelDescription,\n uiTarsVersion,\n vlMode,\n };\n}\n\nexport async function callAI(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n options?: {\n stream?: boolean;\n onChunk?: StreamingCallback;\n },\n): Promise<{ content: string; usage?: AIUsageInfo; isStreamed: boolean }> {\n const { completion, modelName, modelDescription, uiTarsVersion, vlMode } =\n await createChatClient({\n AIActionTypeValue,\n modelConfig,\n });\n\n const maxTokens =\n globalConfigManager.getEnvConfigValue(MIDSCENE_MODEL_MAX_TOKENS) ??\n globalConfigManager.getEnvConfigValue(OPENAI_MAX_TOKENS);\n const debugCall = getDebug('ai:call');\n const debugProfileStats = getDebug('ai:profile:stats');\n const debugProfileDetail = getDebug('ai:profile:detail');\n\n const startTime = Date.now();\n\n const isStreaming = options?.stream && options?.onChunk;\n let content: string | undefined;\n let accumulated = '';\n let usage: OpenAI.CompletionUsage | undefined;\n let timeCost: number | undefined;\n\n const buildUsageInfo = (usageData?: OpenAI.CompletionUsage) => {\n if (!usageData) return undefined;\n\n const cachedInputTokens = (\n usageData as { prompt_tokens_details?: { cached_tokens?: number } }\n )?.prompt_tokens_details?.cached_tokens;\n\n return {\n prompt_tokens: usageData.prompt_tokens ?? 0,\n completion_tokens: usageData.completion_tokens ?? 0,\n total_tokens: usageData.total_tokens ?? 0,\n cached_input: cachedInputTokens ?? 0,\n time_cost: timeCost ?? 0,\n model_name: modelName,\n model_description: modelDescription,\n intent: modelConfig.intent,\n } satisfies AIUsageInfo;\n };\n\n const commonConfig = {\n temperature: vlMode === 'vlm-ui-tars' ? 0.0 : undefined,\n stream: !!isStreaming,\n max_tokens: typeof maxTokens === 'number' ? maxTokens : undefined,\n ...(vlMode === 'qwen2.5-vl' // qwen vl v2 specific config\n ? {\n vl_high_resolution_images: true,\n }\n : {}),\n };\n\n try {\n debugCall(\n `sending ${isStreaming ? 'streaming ' : ''}request to ${modelName}`,\n );\n\n if (isStreaming) {\n const stream = (await completion.create(\n {\n model: modelName,\n messages,\n ...commonConfig,\n },\n {\n stream: true,\n },\n )) as Stream<OpenAI.Chat.Completions.ChatCompletionChunk> & {\n _request_id?: string | null;\n };\n\n for await (const chunk of stream) {\n const content = chunk.choices?.[0]?.delta?.content || '';\n const reasoning_content =\n (chunk.choices?.[0]?.delta as any)?.reasoning_content || '';\n\n // Check for usage info in any chunk (OpenAI provides usage in separate chunks)\n if (chunk.usage) {\n usage = chunk.usage;\n }\n\n if (content || reasoning_content) {\n accumulated += content;\n const chunkData: CodeGenerationChunk = {\n content,\n reasoning_content,\n accumulated,\n isComplete: false,\n usage: undefined,\n };\n options.onChunk!(chunkData);\n }\n\n // Check if stream is complete\n if (chunk.choices?.[0]?.finish_reason) {\n timeCost = Date.now() - startTime;\n\n // If usage is not available from the stream, provide a basic usage info\n if (!usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor(accumulated.length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n };\n }\n\n // Send final chunk\n const finalChunk: CodeGenerationChunk = {\n content: '',\n accumulated,\n reasoning_content: '',\n isComplete: true,\n usage: buildUsageInfo(usage),\n };\n options.onChunk!(finalChunk);\n break;\n }\n }\n content = accumulated;\n debugProfileStats(\n `streaming model, ${modelName}, mode, ${vlMode || 'default'}, cost-ms, ${timeCost}`,\n );\n } else {\n const result = await completion.create({\n model: modelName,\n messages,\n ...commonConfig,\n } as any);\n timeCost = Date.now() - startTime;\n\n debugProfileStats(\n `model, ${modelName}, mode, ${vlMode || 'default'}, ui-tars-version, ${uiTarsVersion}, prompt-tokens, ${result.usage?.prompt_tokens || ''}, completion-tokens, ${result.usage?.completion_tokens || ''}, total-tokens, ${result.usage?.total_tokens || ''}, cost-ms, ${timeCost}, requestId, ${result._request_id || ''}`,\n );\n\n debugProfileDetail(`model usage detail: ${JSON.stringify(result.usage)}`);\n\n assert(\n result.choices,\n `invalid response from LLM service: ${JSON.stringify(result)}`,\n );\n content = result.choices[0].message.content!;\n usage = result.usage;\n }\n\n debugCall(`response: ${content}`);\n assert(content, 'empty content');\n\n // Ensure we always have usage info for streaming responses\n if (isStreaming && !usage) {\n // Estimate token counts based on content length (rough approximation)\n const estimatedTokens = Math.max(\n 1,\n Math.floor((content || '').length / 4),\n );\n usage = {\n prompt_tokens: estimatedTokens,\n completion_tokens: estimatedTokens,\n total_tokens: estimatedTokens * 2,\n } as OpenAI.CompletionUsage;\n }\n\n return {\n content: content || '',\n usage: buildUsageInfo(usage),\n isStreamed: !!isStreaming,\n };\n } catch (e: any) {\n console.error(' call AI error', e);\n const newError = new Error(\n `failed to call ${isStreaming ? 'streaming ' : ''}AI model service: ${e.message}. Trouble shooting: https://midscenejs.com/model-provider.html`,\n {\n cause: e,\n },\n );\n throw newError;\n }\n}\n\nexport async function callAIWithObjectResponse<T>(\n messages: ChatCompletionMessageParam[],\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: T; contentString: string; usage?: AIUsageInfo }> {\n const response = await callAI(messages, AIActionTypeValue, modelConfig);\n assert(response, 'empty response');\n const vlMode = modelConfig.vlMode;\n const jsonContent = safeParseJson(response.content, vlMode);\n return {\n content: jsonContent,\n contentString: response.content,\n usage: response.usage,\n };\n}\n\nexport async function callAIWithStringResponse(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelConfig: IModelConfig,\n): Promise<{ content: string; usage?: AIUsageInfo }> {\n const { content, usage } = await callAI(msgs, AIActionTypeValue, modelConfig);\n return { content, usage };\n}\n\nexport function extractJSONFromCodeBlock(response: string) {\n try {\n // First, try to match a JSON object directly in the response\n const jsonMatch = response.match(/^\\s*(\\{[\\s\\S]*\\})\\s*$/);\n if (jsonMatch) {\n return jsonMatch[1];\n }\n\n // If no direct JSON object is found, try to extract JSON from a code block\n const codeBlockMatch = response.match(\n /```(?:json)?\\s*(\\{[\\s\\S]*?\\})\\s*```/,\n );\n if (codeBlockMatch) {\n return codeBlockMatch[1];\n }\n\n // If no code block is found, try to find a JSON-like structure in the text\n const jsonLikeMatch = response.match(/\\{[\\s\\S]*\\}/);\n if (jsonLikeMatch) {\n return jsonLikeMatch[0];\n }\n } catch {}\n // If no JSON-like structure is found, return the original response\n return response;\n}\n\nexport function preprocessDoubaoBboxJson(input: string) {\n if (input.includes('bbox')) {\n // when its values like 940 445 969 490, replace all /\\d+\\s+\\d+/g with /$1,$2/g\n while (/\\d+\\s+\\d+/.test(input)) {\n input = input.replace(/(\\d+)\\s+(\\d+)/g, '$1,$2');\n }\n }\n return input;\n}\n\n/**\n * Normalize a parsed JSON object by trimming whitespace from:\n * 1. All object keys (e.g., \" prompt \" -> \"prompt\")\n * 2. All string values (e.g., \" Tap \" -> \"Tap\")\n * This handles LLM output that may include leading/trailing spaces.\n */\nfunction normalizeJsonObject(obj: any): any {\n // Handle null and undefined\n if (obj === null || obj === undefined) {\n return obj;\n }\n\n // Handle arrays - recursively normalize each element\n if (Array.isArray(obj)) {\n return obj.map((item) => normalizeJsonObject(item));\n }\n\n // Handle objects\n if (typeof obj === 'object') {\n const normalized: any = {};\n\n for (const [key, value] of Object.entries(obj)) {\n // Trim the key to remove leading/trailing spaces\n const trimmedKey = key.trim();\n\n // Recursively normalize the value\n let normalizedValue = normalizeJsonObject(value);\n\n // Trim all string values\n if (typeof normalizedValue === 'string') {\n normalizedValue = normalizedValue.trim();\n }\n\n normalized[trimmedKey] = normalizedValue;\n }\n\n return normalized;\n }\n\n // Handle primitive strings\n if (typeof obj === 'string') {\n return obj.trim();\n }\n\n // Return other primitives as-is\n return obj;\n}\n\nexport function safeParseJson(input: string, vlMode: TVlModeTypes | undefined) {\n const cleanJsonString = extractJSONFromCodeBlock(input);\n // match the point\n if (cleanJsonString?.match(/\\((\\d+),(\\d+)\\)/)) {\n return cleanJsonString\n .match(/\\((\\d+),(\\d+)\\)/)\n ?.slice(1)\n .map(Number);\n }\n\n let parsed: any;\n try {\n parsed = JSON.parse(cleanJsonString);\n return normalizeJsonObject(parsed);\n } catch {}\n try {\n parsed = JSON.parse(jsonrepair(cleanJsonString));\n return normalizeJsonObject(parsed);\n } catch (e) {}\n\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n const jsonString = preprocessDoubaoBboxJson(cleanJsonString);\n parsed = JSON.parse(jsonrepair(jsonString));\n return normalizeJsonObject(parsed);\n }\n throw Error(`failed to parse json response: ${input}`);\n}\n"],"names":["createChatClient","AIActionTypeValue","modelConfig","socksProxy","httpProxy","modelName","openaiBaseURL","openaiApiKey","openaiExtraConfig","modelDescription","uiTarsVersion","vlMode","createOpenAIClient","timeout","proxyAgent","debugProxy","getDebug","HttpsProxyAgent","SocksProxyAgent","openAIOptions","baseOpenAI","OpenAI","openai","globalConfigManager","MIDSCENE_LANGSMITH_DEBUG","ifInBrowser","Error","console","langsmithModule","wrapOpenAI","MIDSCENE_LANGFUSE_DEBUG","langfuseModule","observeOpenAI","wrappedClient","callAI","messages","options","completion","maxTokens","MIDSCENE_MODEL_MAX_TOKENS","OPENAI_MAX_TOKENS","debugCall","debugProfileStats","debugProfileDetail","startTime","Date","isStreaming","content","accumulated","usage","timeCost","buildUsageInfo","usageData","cachedInputTokens","commonConfig","undefined","stream","chunk","reasoning_content","chunkData","estimatedTokens","Math","finalChunk","result","JSON","assert","e","newError","callAIWithObjectResponse","response","jsonContent","safeParseJson","callAIWithStringResponse","msgs","extractJSONFromCodeBlock","jsonMatch","codeBlockMatch","jsonLikeMatch","preprocessDoubaoBboxJson","input","normalizeJsonObject","obj","Array","item","normalized","key","value","Object","trimmedKey","normalizedValue","cleanJsonString","Number","parsed","jsonrepair","jsonString"],"mappings":";;;;;;;AAuBA,eAAeA,iBAAiB,EAC9BC,iBAAiB,EACjBC,WAAW,EAIZ;IAOC,MAAM,EACJC,UAAU,EACVC,SAAS,EACTC,SAAS,EACTC,aAAa,EACbC,YAAY,EACZC,iBAAiB,EACjBC,gBAAgB,EAChB,oBAAoBC,aAAa,EACjCC,MAAM,EACNC,kBAAkB,EAClBC,OAAO,EACR,GAAGX;IAEJ,IAAIY;IACJ,MAAMC,aAAaC,SAAS;IAC5B,IAAIZ,WAAW;QACbW,WAAW,oBAAoBX;QAC/BU,aAAa,IAAIG,gBAAgBb;IACnC,OAAO,IAAID,YAAY;QACrBY,WAAW,qBAAqBZ;QAChCW,aAAa,IAAII,gBAAgBf;IACnC;IAEA,MAAMgB,gBAAgB;QACpB,SAASb;QACT,QAAQC;QACR,GAAIO,aAAa;YAAE,WAAWA;QAAkB,IAAI,CAAC,CAAC;QACtD,GAAGN,iBAAiB;QACpB,GAAI,AAAmB,YAAnB,OAAOK,UAAuB;YAAEA;QAAQ,IAAI,CAAC,CAAC;QAClD,yBAAyB;IAC3B;IAEA,MAAMO,aAAa,IAAIC,SAAOF;IAE9B,IAAIG,SAAiBF;IAGrB,IACEE,UACAC,oBAAoB,qBAAqB,CAACC,2BAC1C;QACA,IAAIC,aACF,MAAM,IAAIC,MAAM;QAElBC,QAAQ,GAAG,CAAC;QAEZ,MAAMC,kBAAkB;QACxB,MAAM,EAAEC,UAAU,EAAE,GAAG,MAAM,MAAM,CAACD;QACpCN,SAASO,WAAWP;IACtB;IAGA,IACEA,UACAC,oBAAoB,qBAAqB,CAACO,0BAC1C;QACA,IAAIL,aACF,MAAM,IAAIC,MAAM;QAElBC,QAAQ,GAAG,CAAC;QAEZ,MAAMI,iBAAiB;QACvB,MAAM,EAAEC,aAAa,EAAE,GAAG,MAAM,MAAM,CAACD;QACvCT,SAASU,cAAcV;IACzB;IAEA,IAAIV,oBAAoB;QACtB,MAAMqB,gBAAgB,MAAMrB,mBAAmBQ,YAAYD;QAE3D,IAAIc,eACFX,SAASW;IAEb;IAEA,OAAO;QACL,YAAYX,OAAO,IAAI,CAAC,WAAW;QACnCjB;QACAI;QACAC;QACAC;IACF;AACF;AAEO,eAAeuB,OACpBC,QAAsC,EACtClC,iBAA+B,EAC/BC,WAAyB,EACzBkC,OAGC;IAED,MAAM,EAAEC,UAAU,EAAEhC,SAAS,EAAEI,gBAAgB,EAAEC,aAAa,EAAEC,MAAM,EAAE,GACtE,MAAMX,iBAAiB;QACrBC;QACAC;IACF;IAEF,MAAMoC,YACJf,oBAAoB,iBAAiB,CAACgB,8BACtChB,oBAAoB,iBAAiB,CAACiB;IACxC,MAAMC,YAAYzB,SAAS;IAC3B,MAAM0B,oBAAoB1B,SAAS;IACnC,MAAM2B,qBAAqB3B,SAAS;IAEpC,MAAM4B,YAAYC,KAAK,GAAG;IAE1B,MAAMC,cAAcV,SAAS,UAAUA,SAAS;IAChD,IAAIW;IACJ,IAAIC,cAAc;IAClB,IAAIC;IACJ,IAAIC;IAEJ,MAAMC,iBAAiB,CAACC;QACtB,IAAI,CAACA,WAAW;QAEhB,MAAMC,oBACJD,WACC,uBAAuB;QAE1B,OAAO;YACL,eAAeA,UAAU,aAAa,IAAI;YAC1C,mBAAmBA,UAAU,iBAAiB,IAAI;YAClD,cAAcA,UAAU,YAAY,IAAI;YACxC,cAAcC,qBAAqB;YACnC,WAAWH,YAAY;YACvB,YAAY7C;YACZ,mBAAmBI;YACnB,QAAQP,YAAY,MAAM;QAC5B;IACF;IAEA,MAAMoD,eAAe;QACnB,aAAa3C,AAAW,kBAAXA,SAA2B,MAAM4C;QAC9C,QAAQ,CAAC,CAACT;QACV,YAAY,AAAqB,YAArB,OAAOR,YAAyBA,YAAYiB;QACxD,GAAI5C,AAAW,iBAAXA,SACA;YACE,2BAA2B;QAC7B,IACA,CAAC,CAAC;IACR;IAEA,IAAI;QACF8B,UACE,CAAC,QAAQ,EAAEK,cAAc,eAAe,GAAG,WAAW,EAAEzC,WAAW;QAGrE,IAAIyC,aAAa;YACf,MAAMU,SAAU,MAAMnB,WAAW,MAAM,CACrC;gBACE,OAAOhC;gBACP8B;gBACA,GAAGmB,YAAY;YACjB,GACA;gBACE,QAAQ;YACV;YAKF,WAAW,MAAMG,SAASD,OAAQ;gBAChC,MAAMT,UAAUU,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAO,WAAW;gBACtD,MAAMC,oBACHD,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,OAAe,qBAAqB;gBAG3D,IAAIA,MAAM,KAAK,EACbR,QAAQQ,MAAM,KAAK;gBAGrB,IAAIV,WAAWW,mBAAmB;oBAChCV,eAAeD;oBACf,MAAMY,YAAiC;wBACrCZ;wBACAW;wBACAV;wBACA,YAAY;wBACZ,OAAOO;oBACT;oBACAnB,QAAQ,OAAO,CAAEuB;gBACnB;gBAGA,IAAIF,MAAM,OAAO,EAAE,CAAC,EAAE,EAAE,eAAe;oBACrCP,WAAWL,KAAK,GAAG,KAAKD;oBAGxB,IAAI,CAACK,OAAO;wBAEV,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAACb,YAAY,MAAM,GAAG;wBAElCC,QAAQ;4BACN,eAAeW;4BACf,mBAAmBA;4BACnB,cAAcA,AAAkB,IAAlBA;wBAChB;oBACF;oBAGA,MAAME,aAAkC;wBACtC,SAAS;wBACTd;wBACA,mBAAmB;wBACnB,YAAY;wBACZ,OAAOG,eAAeF;oBACxB;oBACAb,QAAQ,OAAO,CAAE0B;oBACjB;gBACF;YACF;YACAf,UAAUC;YACVN,kBACE,CAAC,iBAAiB,EAAErC,UAAU,QAAQ,EAAEM,UAAU,UAAU,WAAW,EAAEuC,UAAU;QAEvF,OAAO;YACL,MAAMa,SAAS,MAAM1B,WAAW,MAAM,CAAC;gBACrC,OAAOhC;gBACP8B;gBACA,GAAGmB,YAAY;YACjB;YACAJ,WAAWL,KAAK,GAAG,KAAKD;YAExBF,kBACE,CAAC,OAAO,EAAErC,UAAU,QAAQ,EAAEM,UAAU,UAAU,mBAAmB,EAAED,cAAc,iBAAiB,EAAEqD,OAAO,KAAK,EAAE,iBAAiB,GAAG,qBAAqB,EAAEA,OAAO,KAAK,EAAE,qBAAqB,GAAG,gBAAgB,EAAEA,OAAO,KAAK,EAAE,gBAAgB,GAAG,WAAW,EAAEb,SAAS,aAAa,EAAEa,OAAO,WAAW,IAAI,IAAI;YAG3TpB,mBAAmB,CAAC,oBAAoB,EAAEqB,KAAK,SAAS,CAACD,OAAO,KAAK,GAAG;YAExEE,OACEF,OAAO,OAAO,EACd,CAAC,mCAAmC,EAAEC,KAAK,SAAS,CAACD,SAAS;YAEhEhB,UAAUgB,OAAO,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,OAAO;YAC3Cd,QAAQc,OAAO,KAAK;QACtB;QAEAtB,UAAU,CAAC,UAAU,EAAEM,SAAS;QAChCkB,OAAOlB,SAAS;QAGhB,IAAID,eAAe,CAACG,OAAO;YAEzB,MAAMW,kBAAkBC,KAAK,GAAG,CAC9B,GACAA,KAAK,KAAK,CAAEd,AAAAA,CAAAA,WAAW,EAAC,EAAG,MAAM,GAAG;YAEtCE,QAAQ;gBACN,eAAeW;gBACf,mBAAmBA;gBACnB,cAAcA,AAAkB,IAAlBA;YAChB;QACF;QAEA,OAAO;YACL,SAASb,WAAW;YACpB,OAAOI,eAAeF;YACtB,YAAY,CAAC,CAACH;QAChB;IACF,EAAE,OAAOoB,GAAQ;QACfvC,QAAQ,KAAK,CAAC,kBAAkBuC;QAChC,MAAMC,WAAW,IAAIzC,MACnB,CAAC,eAAe,EAAEoB,cAAc,eAAe,GAAG,kBAAkB,EAAEoB,EAAE,OAAO,CAAC,8DAA8D,CAAC,EAC/I;YACE,OAAOA;QACT;QAEF,MAAMC;IACR;AACF;AAEO,eAAeC,yBACpBjC,QAAsC,EACtClC,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAMmE,WAAW,MAAMnC,OAAOC,UAAUlC,mBAAmBC;IAC3D+D,OAAOI,UAAU;IACjB,MAAM1D,SAAST,YAAY,MAAM;IACjC,MAAMoE,cAAcC,cAAcF,SAAS,OAAO,EAAE1D;IACpD,OAAO;QACL,SAAS2D;QACT,eAAeD,SAAS,OAAO;QAC/B,OAAOA,SAAS,KAAK;IACvB;AACF;AAEO,eAAeG,yBACpBC,IAAY,EACZxE,iBAA+B,EAC/BC,WAAyB;IAEzB,MAAM,EAAE6C,OAAO,EAAEE,KAAK,EAAE,GAAG,MAAMf,OAAOuC,MAAMxE,mBAAmBC;IACjE,OAAO;QAAE6C;QAASE;IAAM;AAC1B;AAEO,SAASyB,yBAAyBL,QAAgB;IACvD,IAAI;QAEF,MAAMM,YAAYN,SAAS,KAAK,CAAC;QACjC,IAAIM,WACF,OAAOA,SAAS,CAAC,EAAE;QAIrB,MAAMC,iBAAiBP,SAAS,KAAK,CACnC;QAEF,IAAIO,gBACF,OAAOA,cAAc,CAAC,EAAE;QAI1B,MAAMC,gBAAgBR,SAAS,KAAK,CAAC;QACrC,IAAIQ,eACF,OAAOA,aAAa,CAAC,EAAE;IAE3B,EAAE,OAAM,CAAC;IAET,OAAOR;AACT;AAEO,SAASS,yBAAyBC,KAAa;IACpD,IAAIA,MAAM,QAAQ,CAAC,SAEjB,MAAO,YAAY,IAAI,CAACA,OACtBA,QAAQA,MAAM,OAAO,CAAC,kBAAkB;IAG5C,OAAOA;AACT;AAQA,SAASC,oBAAoBC,GAAQ;IAEnC,IAAIA,QAAAA,KACF,OAAOA;IAIT,IAAIC,MAAM,OAAO,CAACD,MAChB,OAAOA,IAAI,GAAG,CAAC,CAACE,OAASH,oBAAoBG;IAI/C,IAAI,AAAe,YAAf,OAAOF,KAAkB;QAC3B,MAAMG,aAAkB,CAAC;QAEzB,KAAK,MAAM,CAACC,KAAKC,MAAM,IAAIC,OAAO,OAAO,CAACN,KAAM;YAE9C,MAAMO,aAAaH,IAAI,IAAI;YAG3B,IAAII,kBAAkBT,oBAAoBM;YAG1C,IAAI,AAA2B,YAA3B,OAAOG,iBACTA,kBAAkBA,gBAAgB,IAAI;YAGxCL,UAAU,CAACI,WAAW,GAAGC;QAC3B;QAEA,OAAOL;IACT;IAGA,IAAI,AAAe,YAAf,OAAOH,KACT,OAAOA,IAAI,IAAI;IAIjB,OAAOA;AACT;AAEO,SAASV,cAAcQ,KAAa,EAAEpE,MAAgC;IAC3E,MAAM+E,kBAAkBhB,yBAAyBK;IAEjD,IAAIW,iBAAiB,MAAM,oBACzB,OAAOA,gBACJ,KAAK,CAAC,oBACL,MAAM,GACP,IAAIC;IAGT,IAAIC;IACJ,IAAI;QACFA,SAAS5B,KAAK,KAAK,CAAC0B;QACpB,OAAOV,oBAAoBY;IAC7B,EAAE,OAAM,CAAC;IACT,IAAI;QACFA,SAAS5B,KAAK,KAAK,CAAC6B,WAAWH;QAC/B,OAAOV,oBAAoBY;IAC7B,EAAE,OAAO1B,GAAG,CAAC;IAEb,IAAIvD,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,QAA0B;QAC1D,MAAMmF,aAAahB,yBAAyBY;QAC5CE,SAAS5B,KAAK,KAAK,CAAC6B,WAAWC;QAC/B,OAAOd,oBAAoBY;IAC7B;IACA,MAAMlE,MAAM,CAAC,+BAA+B,EAAEqD,OAAO;AACvD"}
|