@midscene/core 1.0.1-beta-20251028121806.0 → 1.0.1-beta-20251029093754.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/es/agent/task-builder.mjs +7 -3
  2. package/dist/es/agent/task-builder.mjs.map +1 -1
  3. package/dist/es/agent/tasks.mjs +5 -0
  4. package/dist/es/agent/tasks.mjs.map +1 -1
  5. package/dist/es/agent/utils.mjs +1 -1
  6. package/dist/es/ai-model/index.mjs +2 -2
  7. package/dist/es/ai-model/inspect.mjs +21 -4
  8. package/dist/es/ai-model/inspect.mjs.map +1 -1
  9. package/dist/es/ai-model/prompt/llm-locator.mjs +8 -32
  10. package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
  11. package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
  12. package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
  13. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
  14. package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
  15. package/dist/es/device/index.mjs.map +1 -1
  16. package/dist/es/task-runner.mjs +31 -3
  17. package/dist/es/task-runner.mjs.map +1 -1
  18. package/dist/es/types.mjs.map +1 -1
  19. package/dist/es/utils.mjs +2 -2
  20. package/dist/lib/agent/task-builder.js +7 -3
  21. package/dist/lib/agent/task-builder.js.map +1 -1
  22. package/dist/lib/agent/tasks.js +5 -0
  23. package/dist/lib/agent/tasks.js.map +1 -1
  24. package/dist/lib/agent/utils.js +1 -1
  25. package/dist/lib/ai-model/index.js +13 -10
  26. package/dist/lib/ai-model/inspect.js +25 -5
  27. package/dist/lib/ai-model/inspect.js.map +1 -1
  28. package/dist/lib/ai-model/prompt/llm-locator.js +8 -32
  29. package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
  30. package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -14
  31. package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
  32. package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
  33. package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
  34. package/dist/lib/device/index.js.map +1 -1
  35. package/dist/lib/task-runner.js +31 -3
  36. package/dist/lib/task-runner.js.map +1 -1
  37. package/dist/lib/types.js.map +1 -1
  38. package/dist/lib/utils.js +2 -2
  39. package/dist/types/ai-model/index.d.ts +1 -1
  40. package/dist/types/ai-model/inspect.d.ts +6 -0
  41. package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -1
  42. package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
  43. package/dist/types/device/index.d.ts +4 -2
  44. package/dist/types/task-runner.d.ts +2 -0
  45. package/dist/types/types.d.ts +2 -1
  46. package/package.json +2 -3
@@ -161,6 +161,8 @@ class TaskBuilder {
161
161
  const actionFn = action.call.bind(this.interface);
162
162
  await actionFn(param, taskContext);
163
163
  debug('called action', action.name);
164
+ const delayAfterRunner = action.delayAfterRunner ?? 300;
165
+ if (delayAfterRunner > 0) await sleep(delayAfterRunner);
164
166
  try {
165
167
  if (this.interface.afterInvokeAction) {
166
168
  debug('will call "afterInvokeAction" for interface');
@@ -254,9 +256,11 @@ class TaskBuilder {
254
256
  const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
255
257
  let currentCacheEntry;
256
258
  if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
257
- const feature = await this.interface.cacheFeatureForRect(element.rect, void 0 !== element.isOrderSensitive ? {
258
- _orderSensitive: element.isOrderSensitive
259
- } : void 0);
259
+ var _param_prompt;
260
+ const feature = await this.interface.cacheFeatureForRect(element.rect, {
261
+ targetDescription: 'string' == typeof param.prompt ? param.prompt : null == (_param_prompt = param.prompt) ? void 0 : _param_prompt.prompt,
262
+ modelConfig
263
+ });
260
264
  if (feature && Object.keys(feature).length > 0) {
261
265
  debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
262
266
  currentCacheEntry = feature;
@@ -1 +1 @@
1
- {"version":3,"file":"agent/task-builder.mjs","sources":["webpack://@midscene/core/./src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type {\n DetailedLocateParam,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { matchElementFromCache, matchElementFromPlan } from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n subTask?: boolean;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfig: IModelConfig;\n cacheable?: boolean;\n subTask: boolean;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n constructor({ interfaceInstance, service, taskCache }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfig,\n cacheable,\n subTask: !!options?.subTask,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n [\n 'Sleep',\n (plan) =>\n this.handleSleepPlan(\n plan as PlanningAction<PlanningActionParamSleep>,\n context,\n ),\n ],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n subTask: context.subTask || undefined,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private handleSleepPlan(\n plan: PlanningAction<PlanningActionParamSleep>,\n context: PlanBuildContext,\n ): void {\n const sleepTask = this.createSleepTask(plan.param, {\n thought: plan.thought,\n locate: plan.locate,\n });\n if (context.subTask) {\n sleepTask.subTask = true;\n }\n context.tasks.push(sleepTask);\n }\n\n public createSleepTask(\n param: PlanningActionParamSleep,\n meta?: { thought?: string; locate?: PlanningAction['locate'] | null },\n ): ExecutionTaskActionApply<PlanningActionParamSleep> {\n return {\n type: 'Action',\n subType: 'Sleep',\n param,\n thought: meta?.thought,\n locate: meta?.locate ?? null,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n if (!plan.locate || plan.locate === null) {\n debug('Locate action with id is null, will be ignored', plan);\n return;\n }\n\n const taskLocate = this.createLocateTask(plan, plan.locate, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n subTask: context.subTask || undefined,\n executor: async (param, taskContext) => {\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, taskContext);\n debug('called action', action.name);\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply {\n const { cacheable, modelConfig } = context;\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n subTask: context.subTask || undefined,\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (param.xpath && this.interface.rectMatchesCacheFeature) {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n }\n const elementFromXpath = rectFromXpath\n ? generateElementByPosition({\n x: rectFromXpath.left + rectFromXpath.width / 2,\n y: rectFromXpath.top + rectFromXpath.height / 2,\n })\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n let elementFromAiLocate: LocateResultElement | null | undefined;\n if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) {\n try {\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfig,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n }\n\n const element =\n elementFromXpath ||\n elementFromCache ||\n elementFromPlan ||\n elementFromAiLocate;\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n element.isOrderSensitive !== undefined\n ? { _orderSensitive: element.isOrderSensitive }\n : undefined,\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n rect: elementFromPlan?.rect,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n hitBy,\n };\n },\n };\n\n return taskFind;\n }\n}\n"],"names":["debug","getDebug","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfig","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","undefined","sleepTask","meta","taskParam","sleep","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","locateTask","result","assert","task","taskContext","_taskContext_element","uiContext","Promise","originalError","originalMessage","String","parseActionParam","error","actionFn","detailedLocateParam","onResult","locateParam","taskFind","_this_taskCache","_locateCacheRecord_cacheContent","locateDump","locateResult","applyDump","dump","_dump_taskInfo","_dump_taskInfo1","rectFromXpath","elementFromXpath","generateElementByPosition","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","ServiceError","element","currentCacheEntry","feature","Object","hitBy","interfaceInstance","service","taskCache"],"mappings":";;;;;;;;;;;;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AAEhB,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAoBO,MAAMC;IAaX,MAAa,MACXC,KAAuB,EACvBC,WAAyB,EACzBC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,SAAS;QAEpC,MAAMG,UAA4B;YAChCF;YACAF;YACAG;YACA,SAAS,CAAC,CAACF,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QAC5B;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;YAC9D;gBACE;gBACA,CAACG,OACC,IAAI,CAAC,eAAe,CAClBA,MACAH;aAEL;SACF;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQR,MAAO;YACxB,MAAMU,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;YACnB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,WAAa;QACzB;QACAP,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEQ,gBACNH,IAA8C,EAC9CH,OAAyB,EACnB;QACN,MAAMQ,YAAY,IAAI,CAAC,eAAe,CAACL,KAAK,KAAK,EAAE;YACjD,SAASA,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;QACrB;QACA,IAAIH,QAAQ,OAAO,EACjBQ,UAAU,OAAO,GAAG;QAEtBR,QAAQ,KAAK,CAAC,IAAI,CAACQ;IACrB;IAEO,gBACLjB,KAA+B,EAC/BkB,IAAqE,EACjB;QACpD,OAAO;YACL,MAAM;YACN,SAAS;YACTlB;YACA,SAASkB,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO;YACtB,QAAQA,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,MAAM,AAAD,KAAK;YACxB,UAAU,OAAOC;gBACf,MAAMC,MAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;YACnC;QACF;IACF;IAEA,MAAc,iBACZP,IAAyC,EACzCH,OAAyB,EACV;QACf,IAAI,CAACG,KAAK,MAAM,IAAIA,AAAgB,SAAhBA,KAAK,MAAM,EAAW,YACxCf,MAAM,kDAAkDe;QAI1D,MAAMS,aAAa,IAAI,CAAC,gBAAgB,CAACT,MAAMA,KAAK,MAAM,EAAEH;QAC5DA,QAAQ,KAAK,CAAC,IAAI,CAACY;IACrB;IAEA,MAAc,iBACZT,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMa,WAAWV,KAAK,IAAI;QAC1B,MAAMW,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;QACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMtB,QAAQY,KAAK,KAAK;QAExB,IAAI,CAACY,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI9B,KAAK,CAAC8B,MAAM,EAAE;gBAChB,MAAM5B,aAAaH,oBAAoBC,KAAK,CAAC8B,MAAM;gBACnDjC,MACE,uCACA,CAAC,YAAY,EAAEyB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC/B,KAAK,CAAC8B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAAC7B,aAAa;gBAE5C,MAAM8B,aAAa,IAAI,CAAC,gBAAgB,CACtC9B,YACAF,KAAK,CAAC8B,MAAM,EACZrB,SACA,CAACwB;oBACCjC,KAAK,CAAC8B,MAAM,GAAGG;gBACjB;gBAEFxB,QAAQ,KAAK,CAAC,IAAI,CAACuB;YACrB,OAAO;gBACLE,OACE,CAACL,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3EzB,MAAM,CAAC,OAAO,EAAEiC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMa,OAKF;YACF,MAAM;YACN,SAASb;YACT,SAASV,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,OAAOhB,OAAOoC;oBAKWC;gBAJjCxC,MACE,oBACAyB,UACAtB,OACA,CAAC,4BAA4B,EAAE,QAAAqC,CAAAA,uBAAAA,YAAY,OAAO,AAAD,IAAlBA,KAAAA,IAAAA,qBAAqB,MAAM,EAAE;gBAG9D,MAAMC,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBT,qBAAqB,OAAO,CAAC,CAACC;oBAC5BI,OACElC,KAAK,CAAC8B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEA,IAAI;oBACF,MAAMiB,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC1C,MAAM;gCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC2B,OAAO,IAAI,EAAExB;gCACrDH,MAAM;4BACR;wBACF;wBACAuB,MAAM;qBACP;gBACH,EAAE,OAAOoB,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,IAAIhB,OAAO,WAAW,EACpB,IAAI;oBACFxB,QAAQ2C,iBAAiB3C,OAAOwB,OAAO,WAAW;gBACpD,EAAE,OAAOoB,OAAY;oBACnB,MAAM,IAAIlB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,MAAM,OAAO,CAAC,cAAc,EAAEb,KAAK,SAAS,CAAC/B,QAAQ,EACtG;wBAAE,OAAO4C;oBAAM;gBAEnB;gBAGF/C,MAAM,kBAAkB2B,OAAO,IAAI;gBACnC,MAAMqB,WAAWrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAMqB,SAAS7C,OAAOoC;gBACtBvC,MAAM,iBAAiB2B,OAAO,IAAI;gBAElC,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpC3B,MAAM;wBACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC2B,OAAO,IAAI,EAAExB;wBACpDH,MAAM;oBACR;gBACF,EAAE,OAAO2C,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,OAAO;oBACL,QAAQ;wBACN,SAAS;wBACT,QAAQlB;wBACR,OAAOtB;oBACT;gBACF;YACF;QACF;QAEAS,QAAQ,KAAK,CAAC,IAAI,CAAC0B;IACrB;IAEQ,iBACNvB,IAAyC,EACzCkC,mBAAiD,EACjDrC,OAAyB,EACzBsC,QAAgD,EACf;QACjC,MAAM,EAAEvC,SAAS,EAAEH,WAAW,EAAE,GAAGI;QACnC,IAAIuC,cAAcF;QAElB,IAAI,AAAuB,YAAvB,OAAOE,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAIxC,AAAcQ,WAAdR,WACFwC,cAAc;YACZ,GAAGA,WAAW;YACdxC;QACF;QAGF,MAAMyC,WAA4C;YAChD,MAAM;YACN,SAAS;YACT,SAASxC,QAAQ,OAAO,IAAIO;YAC5B,OAAOgC;YACP,SAASpC,KAAK,OAAO;YACrB,UAAU,OAAOZ,OAAOoC;oBAkDIc,iBACPC;gBAlDnB,MAAM,EAAEhB,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACElC,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GAC3B,CAAC,qDAAqD,EAAE+B,KAAK,SAAS,CACpE/B,QACC;gBAGL,IAAI,CAACsC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,IAAIc;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;wBAQJC,gBACTC;oBARJ,IAAI,CAACF,MACH;oBAEFH,aAAaG;oBACbpB,KAAK,GAAG,GAAG;wBACToB;oBACF;oBACApB,KAAK,KAAK,GAAG,QAAAqB,CAAAA,iBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,eAAe,KAAK;oBACjC,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,gBAAe,eAAe,EAChCtB,KAAK,eAAe,GAAGoB,KAAK,QAAQ,CAAC,eAAe;gBAExD;gBAGA,IAAIG;gBACJ,IAAI1D,MAAM,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,uBAAuB,EACvD0D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;oBAC3D,QAAQ;wBAAC1D,MAAM,KAAK;qBAAC;gBACvB;gBAEF,MAAM2D,mBAAmBD,gBACrBE,0BAA0B;oBACxB,GAAGF,cAAc,IAAI,GAAGA,cAAc,KAAK,GAAG;oBAC9C,GAAGA,cAAc,GAAG,GAAGA,cAAc,MAAM,GAAG;gBAChD,KACA1C;gBACJ,MAAM6C,0BAA0B,CAAC,CAACF;gBAElC,MAAMG,cAAc9D,MAAM,MAAM;gBAChC,MAAM+D,oBAAoB,QAAAb,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACY;gBAC3D,MAAME,aAAab,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;gBAEzD,MAAMc,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACA9D,MAAM,SAAS;gBAErB,MAAMmE,eAAe,CAAC,CAACF;gBAEvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBnD,SADAqD,qBAAqBrE;gBAE3B,MAAMsE,cAAc,CAAC,CAACF;gBAEtB,IAAIG;gBACJ,IAAI,CAACV,2BAA2B,CAACM,gBAAgB,CAACG,aAChD,IAAI;oBACFjB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCrD,OACA;wBACE,SAASsC;oBACX,GACAjC;oBAEFiD,UAAUD,aAAa,IAAI;oBAC3BkB,sBAAsBlB,aAAa,OAAO;gBAC5C,EAAE,OAAOT,OAAO;oBACd,IAAIA,iBAAiB4B,cACnBlB,UAAUV,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAGF,MAAM6B,UACJd,oBACAM,oBACAG,mBACAG;gBAEF,IAAIG;gBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDnE,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;oBACF,MAAM2E,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDF,QAAQ,IAAI,EACZA,AAA6BzD,WAA7ByD,QAAQ,gBAAgB,GACpB;wBAAE,iBAAiBA,QAAQ,gBAAgB;oBAAC,IAC5CzD;oBAEN,IAAI2D,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;wBAC9C9E,MACE,uCACAiE,aACAa;wBAEFD,oBAAoBC;wBACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;4BACE,MAAM;4BACN,QAAQb;4BACR,OAAOa;wBACT,GACAZ;oBAEJ,OACElE,MACE,yDACAiE;gBAGN,EAAE,OAAOlB,OAAO;oBACd/C,MAAM,kCAAkC+C;gBAC1C;qBAEA/C,MAAM;gBAIV,IAAI,CAAC4E,SAAS;oBACZ,IAAIrB,YACF,MAAM,IAAIoB,aACR,CAAC,oBAAoB,EAAExE,MAAM,MAAM,EAAE,EACrCoD;oBAGJ,MAAM,IAAI1B,MAAM,CAAC,mBAAmB,EAAE1B,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAI6E;gBAEJ,IAAIhB,yBACFgB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAO7E,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAImE,cACTU,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACPb;wBACA,aAAaU;oBACf;gBACF;qBACK,IAAIJ,aACTO,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMT,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;oBAC7B;gBACF;gBAGFrB,QAAAA,YAAAA,SAAW0B;gBAEX,OAAO;oBACL,QAAQ;wBACNA;oBACF;oBACAI;gBACF;YACF;QACF;QAEA,OAAO5B;IACT;IA1dA,YAAY,EAAE6B,iBAAiB,EAAEC,OAAO,EAAEC,SAAS,EAAmB,CAAE;QANxE,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAGE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;IACnB;AAudF"}
1
+ {"version":3,"file":"agent/task-builder.mjs","sources":["webpack://@midscene/core/./src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type {\n DetailedLocateParam,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { matchElementFromCache, matchElementFromPlan } from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n subTask?: boolean;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfig: IModelConfig;\n cacheable?: boolean;\n subTask: boolean;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n constructor({ interfaceInstance, service, taskCache }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfig,\n cacheable,\n subTask: !!options?.subTask,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n [\n 'Sleep',\n (plan) =>\n this.handleSleepPlan(\n plan as PlanningAction<PlanningActionParamSleep>,\n context,\n ),\n ],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n subTask: context.subTask || undefined,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private handleSleepPlan(\n plan: PlanningAction<PlanningActionParamSleep>,\n context: PlanBuildContext,\n ): void {\n const sleepTask = this.createSleepTask(plan.param, {\n thought: plan.thought,\n locate: plan.locate,\n });\n if (context.subTask) {\n sleepTask.subTask = true;\n }\n context.tasks.push(sleepTask);\n }\n\n public createSleepTask(\n param: PlanningActionParamSleep,\n meta?: { thought?: string; locate?: PlanningAction['locate'] | null },\n ): ExecutionTaskActionApply<PlanningActionParamSleep> {\n return {\n type: 'Action',\n subType: 'Sleep',\n param,\n thought: meta?.thought,\n locate: meta?.locate ?? null,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n if (!plan.locate || plan.locate === null) {\n debug('Locate action with id is null, will be ignored', plan);\n return;\n }\n\n const taskLocate = this.createLocateTask(plan, plan.locate, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n subTask: context.subTask || undefined,\n executor: async (param, taskContext) => {\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, taskContext);\n debug('called action', action.name);\n\n const delayAfterRunner = action.delayAfterRunner ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply {\n const { cacheable, modelConfig } = context;\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n subTask: context.subTask || undefined,\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (param.xpath && this.interface.rectMatchesCacheFeature) {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n }\n const elementFromXpath = rectFromXpath\n ? generateElementByPosition({\n x: rectFromXpath.left + rectFromXpath.width / 2,\n y: rectFromXpath.top + rectFromXpath.height / 2,\n })\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n let elementFromAiLocate: LocateResultElement | null | undefined;\n if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) {\n try {\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfig,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n }\n\n const element =\n elementFromXpath ||\n elementFromCache ||\n elementFromPlan ||\n elementFromAiLocate;\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig,\n },\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n rect: elementFromPlan?.rect,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n hitBy,\n };\n },\n };\n\n return taskFind;\n }\n}\n"],"names":["debug","getDebug","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfig","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","undefined","sleepTask","meta","taskParam","sleep","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","locateTask","result","assert","task","taskContext","_taskContext_element","uiContext","Promise","originalError","originalMessage","String","parseActionParam","error","actionFn","delayAfterRunner","detailedLocateParam","onResult","locateParam","taskFind","_this_taskCache","_locateCacheRecord_cacheContent","locateDump","locateResult","applyDump","dump","_dump_taskInfo","_dump_taskInfo1","rectFromXpath","elementFromXpath","generateElementByPosition","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","ServiceError","element","currentCacheEntry","_param_prompt","feature","Object","hitBy","interfaceInstance","service","taskCache"],"mappings":";;;;;;;;;;;;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AAEhB,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAoBO,MAAMC;IAaX,MAAa,MACXC,KAAuB,EACvBC,WAAyB,EACzBC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,SAAS;QAEpC,MAAMG,UAA4B;YAChCF;YACAF;YACAG;YACA,SAAS,CAAC,CAACF,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QAC5B;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;YAC9D;gBACE;gBACA,CAACG,OACC,IAAI,CAAC,eAAe,CAClBA,MACAH;aAEL;SACF;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQR,MAAO;YACxB,MAAMU,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;YACnB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,WAAa;QACzB;QACAP,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEQ,gBACNH,IAA8C,EAC9CH,OAAyB,EACnB;QACN,MAAMQ,YAAY,IAAI,CAAC,eAAe,CAACL,KAAK,KAAK,EAAE;YACjD,SAASA,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;QACrB;QACA,IAAIH,QAAQ,OAAO,EACjBQ,UAAU,OAAO,GAAG;QAEtBR,QAAQ,KAAK,CAAC,IAAI,CAACQ;IACrB;IAEO,gBACLjB,KAA+B,EAC/BkB,IAAqE,EACjB;QACpD,OAAO;YACL,MAAM;YACN,SAAS;YACTlB;YACA,SAASkB,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO;YACtB,QAAQA,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,MAAM,AAAD,KAAK;YACxB,UAAU,OAAOC;gBACf,MAAMC,MAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;YACnC;QACF;IACF;IAEA,MAAc,iBACZP,IAAyC,EACzCH,OAAyB,EACV;QACf,IAAI,CAACG,KAAK,MAAM,IAAIA,AAAgB,SAAhBA,KAAK,MAAM,EAAW,YACxCf,MAAM,kDAAkDe;QAI1D,MAAMS,aAAa,IAAI,CAAC,gBAAgB,CAACT,MAAMA,KAAK,MAAM,EAAEH;QAC5DA,QAAQ,KAAK,CAAC,IAAI,CAACY;IACrB;IAEA,MAAc,iBACZT,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMa,WAAWV,KAAK,IAAI;QAC1B,MAAMW,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;QACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMtB,QAAQY,KAAK,KAAK;QAExB,IAAI,CAACY,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI9B,KAAK,CAAC8B,MAAM,EAAE;gBAChB,MAAM5B,aAAaH,oBAAoBC,KAAK,CAAC8B,MAAM;gBACnDjC,MACE,uCACA,CAAC,YAAY,EAAEyB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC/B,KAAK,CAAC8B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAAC7B,aAAa;gBAE5C,MAAM8B,aAAa,IAAI,CAAC,gBAAgB,CACtC9B,YACAF,KAAK,CAAC8B,MAAM,EACZrB,SACA,CAACwB;oBACCjC,KAAK,CAAC8B,MAAM,GAAGG;gBACjB;gBAEFxB,QAAQ,KAAK,CAAC,IAAI,CAACuB;YACrB,OAAO;gBACLE,OACE,CAACL,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3EzB,MAAM,CAAC,OAAO,EAAEiC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMa,OAKF;YACF,MAAM;YACN,SAASb;YACT,SAASV,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,OAAOhB,OAAOoC;oBAKWC;gBAJjCxC,MACE,oBACAyB,UACAtB,OACA,CAAC,4BAA4B,EAAE,QAAAqC,CAAAA,uBAAAA,YAAY,OAAO,AAAD,IAAlBA,KAAAA,IAAAA,qBAAqB,MAAM,EAAE;gBAG9D,MAAMC,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBT,qBAAqB,OAAO,CAAC,CAACC;oBAC5BI,OACElC,KAAK,CAAC8B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEA,IAAI;oBACF,MAAMiB,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC1C,MAAM;gCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC2B,OAAO,IAAI,EAAExB;gCACrDH,MAAM;4BACR;wBACF;wBACAuB,MAAM;qBACP;gBACH,EAAE,OAAOoB,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,IAAIhB,OAAO,WAAW,EACpB,IAAI;oBACFxB,QAAQ2C,iBAAiB3C,OAAOwB,OAAO,WAAW;gBACpD,EAAE,OAAOoB,OAAY;oBACnB,MAAM,IAAIlB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,MAAM,OAAO,CAAC,cAAc,EAAEb,KAAK,SAAS,CAAC/B,QAAQ,EACtG;wBAAE,OAAO4C;oBAAM;gBAEnB;gBAGF/C,MAAM,kBAAkB2B,OAAO,IAAI;gBACnC,MAAMqB,WAAWrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAMqB,SAAS7C,OAAOoC;gBACtBvC,MAAM,iBAAiB2B,OAAO,IAAI;gBAElC,MAAMsB,mBAAmBtB,OAAO,gBAAgB,IAAI;gBACpD,IAAIsB,mBAAmB,GACrB,MAAM1B,MAAM0B;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCjD,MAAM;wBACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC2B,OAAO,IAAI,EAAExB;wBACpDH,MAAM;oBACR;gBACF,EAAE,OAAO2C,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,OAAO;oBACL,QAAQ;wBACN,SAAS;wBACT,QAAQlB;wBACR,OAAOtB;oBACT;gBACF;YACF;QACF;QAEAS,QAAQ,KAAK,CAAC,IAAI,CAAC0B;IACrB;IAEQ,iBACNvB,IAAyC,EACzCmC,mBAAiD,EACjDtC,OAAyB,EACzBuC,QAAgD,EACf;QACjC,MAAM,EAAExC,SAAS,EAAEH,WAAW,EAAE,GAAGI;QACnC,IAAIwC,cAAcF;QAElB,IAAI,AAAuB,YAAvB,OAAOE,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAIzC,AAAcQ,WAAdR,WACFyC,cAAc;YACZ,GAAGA,WAAW;YACdzC;QACF;QAGF,MAAM0C,WAA4C;YAChD,MAAM;YACN,SAAS;YACT,SAASzC,QAAQ,OAAO,IAAIO;YAC5B,OAAOiC;YACP,SAASrC,KAAK,OAAO;YACrB,UAAU,OAAOZ,OAAOoC;oBAkDIe,iBACPC;gBAlDnB,MAAM,EAAEjB,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACElC,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GAC3B,CAAC,qDAAqD,EAAE+B,KAAK,SAAS,CACpE/B,QACC;gBAGL,IAAI,CAACsC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,IAAIe;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;wBAQJC,gBACTC;oBARJ,IAAI,CAACF,MACH;oBAEFH,aAAaG;oBACbrB,KAAK,GAAG,GAAG;wBACTqB;oBACF;oBACArB,KAAK,KAAK,GAAG,QAAAsB,CAAAA,iBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,eAAe,KAAK;oBACjC,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,gBAAe,eAAe,EAChCvB,KAAK,eAAe,GAAGqB,KAAK,QAAQ,CAAC,eAAe;gBAExD;gBAGA,IAAIG;gBACJ,IAAI3D,MAAM,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,uBAAuB,EACvD2D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;oBAC3D,QAAQ;wBAAC3D,MAAM,KAAK;qBAAC;gBACvB;gBAEF,MAAM4D,mBAAmBD,gBACrBE,0BAA0B;oBACxB,GAAGF,cAAc,IAAI,GAAGA,cAAc,KAAK,GAAG;oBAC9C,GAAGA,cAAc,GAAG,GAAGA,cAAc,MAAM,GAAG;gBAChD,KACA3C;gBACJ,MAAM8C,0BAA0B,CAAC,CAACF;gBAElC,MAAMG,cAAc/D,MAAM,MAAM;gBAChC,MAAMgE,oBAAoB,QAAAb,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACY;gBAC3D,MAAME,aAAab,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;gBAEzD,MAAMc,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACA/D,MAAM,SAAS;gBAErB,MAAMoE,eAAe,CAAC,CAACF;gBAEvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBpD,SADAsD,qBAAqBtE;gBAE3B,MAAMuE,cAAc,CAAC,CAACF;gBAEtB,IAAIG;gBACJ,IAAI,CAACV,2BAA2B,CAACM,gBAAgB,CAACG,aAChD,IAAI;oBACFjB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCtD,OACA;wBACE,SAASsC;oBACX,GACAjC;oBAEFkD,UAAUD,aAAa,IAAI;oBAC3BkB,sBAAsBlB,aAAa,OAAO;gBAC5C,EAAE,OAAOV,OAAO;oBACd,IAAIA,iBAAiB6B,cACnBlB,UAAUX,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAGF,MAAM8B,UACJd,oBACAM,oBACAG,mBACAG;gBAEF,IAAIG;gBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDpE,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;wBAOQ4E;oBANV,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDH,QAAQ,IAAI,EACZ;wBACE,mBACE,AAAwB,YAAxB,OAAO1E,MAAM,MAAM,GACfA,MAAM,MAAM,WACZ4E,CAAAA,gBAAAA,MAAM,MAAM,AAAD,IAAXA,KAAAA,IAAAA,cAAc,MAAM;wBAC1BvE;oBACF;oBAEF,IAAIwE,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;wBAC9ChF,MACE,uCACAkE,aACAc;wBAEFF,oBAAoBE;wBACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;4BACE,MAAM;4BACN,QAAQd;4BACR,OAAOc;wBACT,GACAb;oBAEJ,OACEnE,MACE,yDACAkE;gBAGN,EAAE,OAAOnB,OAAO;oBACd/C,MAAM,kCAAkC+C;gBAC1C;qBAEA/C,MAAM;gBAIV,IAAI,CAAC6E,SAAS;oBACZ,IAAIrB,YACF,MAAM,IAAIoB,aACR,CAAC,oBAAoB,EAAEzE,MAAM,MAAM,EAAE,EACrCqD;oBAGJ,MAAM,IAAI3B,MAAM,CAAC,mBAAmB,EAAE1B,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAI+E;gBAEJ,IAAIjB,yBACFiB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAO/E,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAIoE,cACTW,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACPd;wBACA,aAAaU;oBACf;gBACF;qBACK,IAAIJ,aACTQ,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMV,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;oBAC7B;gBACF;gBAGFrB,QAAAA,YAAAA,SAAW0B;gBAEX,OAAO;oBACL,QAAQ;wBACNA;oBACF;oBACAK;gBACF;YACF;QACF;QAEA,OAAO7B;IACT;IAneA,YAAY,EAAE8B,iBAAiB,EAAEC,OAAO,EAAEC,SAAS,EAAmB,CAAE;QANxE,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAGE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;IACnB;AAgeF"}
@@ -242,6 +242,11 @@ class TaskExecutor {
242
242
  assert((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
243
243
  outputResult = data[keyOfResult];
244
244
  }
245
+ if ('Assert' === type && !outputResult) {
246
+ task.usage = usage;
247
+ task.thought = thought;
248
+ throw new Error(`Assertion failed: ${thought}`);
249
+ }
245
250
  return {
246
251
  output: outputResult,
247
252
  log: queryDump,
@@ -1 +1 @@
1
- {"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(plans, modelConfig, options);\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n await session.appendAndRun(task);\n\n return {\n runner: session.getRunner(),\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n const result = await session.appendAndRun(tasks);\n const { output } = result!;\n return {\n output,\n runner: session.getRunner(),\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return session.appendErrorPlan(errorMsg);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n const result = await session.appendAndRun(planningTask);\n const planResult: PlanningAIResponse = result?.output;\n if (session.isInErrorState()) {\n return {\n output: planResult,\n runner,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(plans, modelConfig, {\n cacheable,\n subTask: true,\n });\n await session.appendAndRun(executables.tasks);\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (session.isInErrorState()) {\n return {\n output: undefined,\n runner,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner: session.getRunner(),\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfig","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","actionContext","startTime","Date","vlMode","uiTarsModelVersion","undefined","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","tasks","result","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","cacheable","runner","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","Error","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA0CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,MAAMC;IAgBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAwBQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK;QACvB;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,WAAyB,EACzBJ,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACG,OAAOC,aAAaJ;IACpD;IAEA,MAAM,uBAAuBK,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMC,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACL,QAAQF,QAAQ,SAAS;QAC3B;IACF;IAEQ,mBACNF,eAAuB,EACvBS,aAAiC,EACjCV,WAAyB,EACG;QAC5B,MAAMK,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAMI,YAAYC,KAAK,GAAG;gBAC1B,MAAM,EAAEJ,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,MAAM,EAAEK,MAAM,EAAE,GAAGb;gBACnB,MAAMc,qBACJD,AAAW,kBAAXA,SAA2Bb,YAAY,kBAAkB,GAAGe;gBAE9DN,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMO,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD1B,MACE,sCACA0B,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhDR,OAAOS,MAAM,OAAO,CAACF,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAON,AAAAA,CAAAA,qBAAqBO,iBAAiBC,IAAG,EACjEhB,MAAM,eAAe,EACrB;oBACE,SAASE;oBACTE;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CM;oBACAhB;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFV,MAAM,cAAciC,KAAK,SAAS,CAACH,YAAY,MAAM;gBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;gBAEJb,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCsB;gBACF;gBACAtB,gBAAgB,IAAI,CAAC,KAAK,GAAGqB;gBAE7B,MAAMG,eAAeP,WAAW,EAAE;gBAElC,IAAIM,OAAO;oBACT,MAAME,UAAUpB,KAAK,GAAG;oBACxB,MAAMqB,gBAAgBH,QAASE,CAAAA,UAAUrB,SAAQ;oBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;gBAErC;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrBtB,OACE,CAACiB,sCAAsCI,OACvCH,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAASI;wBACTL;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACAZ;gBACF;YACF;QACF;QAEA,OAAOH;IACT;IAEA,MAAM,SACJV,KAAa,EACbI,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACR;QAC5C,MAAM,EAAEuC,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACnC,OAAOC;QAC5D,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC+B;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD;QACnB,OAAO;YACLC;YACA,QAAQjC,QAAQ,SAAS;QAC3B;IACF;IAEQ,wBAAwBkC,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACG5C,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJgD,UAAkB,EAClBxC,WAAyB,EACzBU,aAAsB,EACtB+B,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMtC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUoC;QAEzB,MAAME,SAASvC,QAAQ,SAAS;QAEhC,IAAIwC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvD7C,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAI2C,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO1C,QAAQ,eAAe,CAAC2C;YACjC;YAGA,MAAMC,eAAe,IAAI,CAAC,kBAAkB,CAC1CP,YACA9B,eACAV;YAGF,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC4C;YAC1C,MAAM3B,aAAiCe,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAIhC,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQiB;gBACRsB;YACF;YAIF,MAAM3C,QAAQqB,WAAW,OAAO,IAAI,EAAE;YACtCwB,SAAS,IAAI,IAAKxB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAI4B;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAACjD,OAAOC,aAAa;oBACnEyC;oBACA,SAAS;gBACX;gBACA,MAAMtC,QAAQ,YAAY,CAAC6C,YAAY,KAAK;YAC9C,EAAE,OAAOrB,OAAO;gBACd,OAAOxB,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEwB,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ExB,QACC;YAEP;YACA,IAAII,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQY;gBACR2B;YACF;YAIF,IAAI,CAACtB,WAAW,kCAAkC,EAChD;YAIFuB;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACAF;QACF;IACF;IAEQ,oBACNO,IAAsE,EACtEC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO5C,OAAOgD;gBACtB,MAAM,EAAEjD,IAAI,EAAE,GAAGiD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZpD,KAAK,GAAG,GAAG;wBACToD;oBACF;gBACF;gBAGA,MAAMjD,YAAY8C,YAAY,SAAS;gBACvC7C,OAAOD,WAAW;gBAElB,MAAMkD,mBAAmBT,AAAS,YAATA;gBACzB,IAAIU,cAAcT;gBAClB,IAAIU,cAAc;gBAClB,IAAIF,oBAAqBT,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEW,cAAc;oBACd,MAAMC,gBACJZ,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIS,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGX,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIY;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,KAAK,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1D7D,MAAM;oBACN,MAAM0E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,MAAM;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACA3D,aACAmD,KACAY,sBACAX;gBAEJ,EAAE,OAAOzB,OAAO;oBACd,IAAIA,iBAAiBuC,cACnBV,UAAU7B,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEwC,IAAI,EAAEvC,KAAK,EAAEwC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAIlB,AAAS,cAATA,MAEPoB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL5D,OACE0D,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACP,YAAY,AAAD,MAAM7C,QACxB;oBAEFsD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,OAAO;oBACL,QAAQS;oBACR,KAAKd;oBACL3B;oBACAwC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE6C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS3B,KAAK,SAAS,CAAC2B;QAIzD,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAlD,aACAmD,KACAC;QAGF,MAAMjB,SAAS,MAAMhC,QAAQ,YAAY,CAACkD;QAE1C,IAAI,CAAClB,QACH,MAAM,IAAImC,MACR;QAIJ,MAAM,EAAElC,MAAM,EAAEgC,OAAO,EAAE,GAAGjC;QAE5B,OAAO;YACLC;YACAgC;YACA,QAAQjE,QAAQ,SAAS;QAC3B;IACF;IAEQ,UAAUoE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;YACA,QAAQ;QACV;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBtB,GAA+B,EAC/BnD,WAAyB,EACO;QAChC,MAAM,EAAE0E,UAAU,EAAEtB,gBAAgB,EAAE,GAAGuB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMvE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWwE;QAE1B,MAAMlC,SAASvC,QAAQ,SAAS;QAChC,MAAM,EAAE0E,SAAS,EAAEC,eAAe,EAAE,GAAG3B;QAEvC1C,OAAOgE,WAAW;QAClBhE,OAAOoE,WAAW;QAClBpE,OAAOqE,iBAAiB;QAExBrE,OACEqE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBnE,KAAK,GAAG;QACjC,IAAID,YAAYC,KAAK,GAAG;QACxB,IAAIoE,eAAe;QACnB,MAAOpE,KAAK,GAAG,KAAKmE,mBAAmBF,UAAW;YAChDlE,YAAYC,KAAK,GAAG;YACpB,MAAMyC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAqB,YACA1E,aACA;gBACE,iBAAiB;YACnB,GACAoD;YAGF,MAAMjB,SAAU,MAAMhC,QAAQ,YAAY,CAACkD;YAO3C,IAAIlB,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQpB;gBACR2B;YACF;YAGFsC,eACE7C,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAEuC,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAMrE,KAAK,GAAG;YACpB,IAAIqE,MAAMtE,YAAYmE,iBAAiB;gBACrC,MAAM7C,gBAAgB6C,kBAAmBG,CAAAA,MAAMtE,SAAQ;gBACvD,MAAMuE,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQjD;gBACV;gBACA,MAAM9B,QAAQ,MAAM,CAAC+E;YACvB;QACF;QAEA,OAAO/E,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAE6E,cAAc;IACnE;IAjiBA,YACEG,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QA3BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AA8gBF"}
1
+ {"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(plans, modelConfig, options);\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n await session.appendAndRun(task);\n\n return {\n runner: session.getRunner(),\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n const result = await session.appendAndRun(tasks);\n const { output } = result!;\n return {\n output,\n runner: session.getRunner(),\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return session.appendErrorPlan(errorMsg);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n const result = await session.appendAndRun(planningTask);\n const planResult: PlanningAIResponse = result?.output;\n if (session.isInErrorState()) {\n return {\n output: planResult,\n runner,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(plans, modelConfig, {\n cacheable,\n subTask: true,\n });\n await session.appendAndRun(executables.tasks);\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (session.isInErrorState()) {\n return {\n output: undefined,\n runner,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner: session.getRunner(),\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfig","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","actionContext","startTime","Date","vlMode","uiTarsModelVersion","undefined","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","tasks","result","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","cacheable","runner","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","Error","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA0CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,MAAMC;IAgBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAwBQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK;QACvB;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,WAAyB,EACzBJ,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACG,OAAOC,aAAaJ;IACpD;IAEA,MAAM,uBAAuBK,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMC,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACL,QAAQF,QAAQ,SAAS;QAC3B;IACF;IAEQ,mBACNF,eAAuB,EACvBS,aAAiC,EACjCV,WAAyB,EACG;QAC5B,MAAMK,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAMI,YAAYC,KAAK,GAAG;gBAC1B,MAAM,EAAEJ,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,MAAM,EAAEK,MAAM,EAAE,GAAGb;gBACnB,MAAMc,qBACJD,AAAW,kBAAXA,SAA2Bb,YAAY,kBAAkB,GAAGe;gBAE9DN,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMO,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD1B,MACE,sCACA0B,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhDR,OAAOS,MAAM,OAAO,CAACF,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAON,AAAAA,CAAAA,qBAAqBO,iBAAiBC,IAAG,EACjEhB,MAAM,eAAe,EACrB;oBACE,SAASE;oBACTE;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CM;oBACAhB;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFV,MAAM,cAAciC,KAAK,SAAS,CAACH,YAAY,MAAM;gBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;gBAEJb,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCsB;gBACF;gBACAtB,gBAAgB,IAAI,CAAC,KAAK,GAAGqB;gBAE7B,MAAMG,eAAeP,WAAW,EAAE;gBAElC,IAAIM,OAAO;oBACT,MAAME,UAAUpB,KAAK,GAAG;oBACxB,MAAMqB,gBAAgBH,QAASE,CAAAA,UAAUrB,SAAQ;oBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;gBAErC;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrBtB,OACE,CAACiB,sCAAsCI,OACvCH,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAASI;wBACTL;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACAZ;gBACF;YACF;QACF;QAEA,OAAOH;IACT;IAEA,MAAM,SACJV,KAAa,EACbI,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACR;QAC5C,MAAM,EAAEuC,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACnC,OAAOC;QAC5D,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC+B;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD;QACnB,OAAO;YACLC;YACA,QAAQjC,QAAQ,SAAS;QAC3B;IACF;IAEQ,wBAAwBkC,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACG5C,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJgD,UAAkB,EAClBxC,WAAyB,EACzBU,aAAsB,EACtB+B,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMtC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUoC;QAEzB,MAAME,SAASvC,QAAQ,SAAS;QAEhC,IAAIwC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvD7C,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAI2C,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO1C,QAAQ,eAAe,CAAC2C;YACjC;YAGA,MAAMC,eAAe,IAAI,CAAC,kBAAkB,CAC1CP,YACA9B,eACAV;YAGF,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC4C;YAC1C,MAAM3B,aAAiCe,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAIhC,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQiB;gBACRsB;YACF;YAIF,MAAM3C,QAAQqB,WAAW,OAAO,IAAI,EAAE;YACtCwB,SAAS,IAAI,IAAKxB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAI4B;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAACjD,OAAOC,aAAa;oBACnEyC;oBACA,SAAS;gBACX;gBACA,MAAMtC,QAAQ,YAAY,CAAC6C,YAAY,KAAK;YAC9C,EAAE,OAAOrB,OAAO;gBACd,OAAOxB,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEwB,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ExB,QACC;YAEP;YACA,IAAII,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQY;gBACR2B;YACF;YAIF,IAAI,CAACtB,WAAW,kCAAkC,EAChD;YAIFuB;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACAF;QACF;IACF;IAEQ,oBACNO,IAAsE,EACtEC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO5C,OAAOgD;gBACtB,MAAM,EAAEjD,IAAI,EAAE,GAAGiD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZpD,KAAK,GAAG,GAAG;wBACToD;oBACF;gBACF;gBAGA,MAAMjD,YAAY8C,YAAY,SAAS;gBACvC7C,OAAOD,WAAW;gBAElB,MAAMkD,mBAAmBT,AAAS,YAATA;gBACzB,IAAIU,cAAcT;gBAClB,IAAIU,cAAc;gBAClB,IAAIF,oBAAqBT,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEW,cAAc;oBACd,MAAMC,gBACJZ,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIS,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGX,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIY;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,KAAK,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1D7D,MAAM;oBACN,MAAM0E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,MAAM;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACA3D,aACAmD,KACAY,sBACAX;gBAEJ,EAAE,OAAOzB,OAAO;oBACd,IAAIA,iBAAiBuC,cACnBV,UAAU7B,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEwC,IAAI,EAAEvC,KAAK,EAAEwC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAIlB,AAAS,cAATA,MAEPoB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL5D,OACE0D,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACP,YAAY,AAAD,MAAM7C,QACxB;oBAEFsD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIX,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtChE,KAAK,KAAK,GAAGuB;oBACbvB,KAAK,OAAO,GAAG+D;oBACf,MAAM,IAAIE,MAAM,CAAC,kBAAkB,EAAEF,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACL3B;oBACAwC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE6C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS3B,KAAK,SAAS,CAAC2B;QAIzD,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAlD,aACAmD,KACAC;QAGF,MAAMjB,SAAS,MAAMhC,QAAQ,YAAY,CAACkD;QAE1C,IAAI,CAAClB,QACH,MAAM,IAAImC,MACR;QAIJ,MAAM,EAAElC,MAAM,EAAEgC,OAAO,EAAE,GAAGjC;QAE5B,OAAO;YACLC;YACAgC;YACA,QAAQjE,QAAQ,SAAS;QAC3B;IACF;IAEQ,UAAUoE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;YACA,QAAQ;QACV;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBtB,GAA+B,EAC/BnD,WAAyB,EACO;QAChC,MAAM,EAAE0E,UAAU,EAAEtB,gBAAgB,EAAE,GAAGuB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMvE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWwE;QAE1B,MAAMlC,SAASvC,QAAQ,SAAS;QAChC,MAAM,EAAE0E,SAAS,EAAEC,eAAe,EAAE,GAAG3B;QAEvC1C,OAAOgE,WAAW;QAClBhE,OAAOoE,WAAW;QAClBpE,OAAOqE,iBAAiB;QAExBrE,OACEqE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBnE,KAAK,GAAG;QACjC,IAAID,YAAYC,KAAK,GAAG;QACxB,IAAIoE,eAAe;QACnB,MAAOpE,KAAK,GAAG,KAAKmE,mBAAmBF,UAAW;YAChDlE,YAAYC,KAAK,GAAG;YACpB,MAAMyC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAqB,YACA1E,aACA;gBACE,iBAAiB;YACnB,GACAoD;YAGF,MAAMjB,SAAU,MAAMhC,QAAQ,YAAY,CAACkD;YAO3C,IAAIlB,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQpB;gBACR2B;YACF;YAGFsC,eACE7C,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAEuC,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAMrE,KAAK,GAAG;YACpB,IAAIqE,MAAMtE,YAAYmE,iBAAiB;gBACrC,MAAM7C,gBAAgB6C,kBAAmBG,CAAAA,MAAMtE,SAAQ;gBACvD,MAAMuE,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQjD;gBACV;gBACA,MAAM9B,QAAQ,MAAM,CAAC+E;YACvB;QACF;QAEA,OAAO/E,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAE6E,cAAc;IACnE;IAviBA,YACEG,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QA3BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AAohBF"}
@@ -99,7 +99,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
99
99
  return;
100
100
  }
101
101
  }
102
- const getMidsceneVersion = ()=>"1.0.1-beta-20251028121806.0";
102
+ const getMidsceneVersion = ()=>"1.0.1-beta-20251029093754.0";
103
103
  const parsePrompt = (prompt)=>{
104
104
  if ('string' == typeof prompt) return {
105
105
  textPrompt: prompt,
@@ -3,9 +3,9 @@ import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
3
3
  import { describeUserPage } from "./prompt/util.mjs";
4
4
  import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
5
5
  import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
6
- import { AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
6
+ import { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection } from "./inspect.mjs";
7
7
  import { plan } from "./llm-planning.mjs";
8
8
  import { AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, dumpActionParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, loadActionParam, parseActionParam } from "./common.mjs";
9
9
  import { resizeImageForUiTars, uiTarsPlanning } from "./ui-tars-planning.mjs";
10
10
  import { ConversationHistory } from "./conversation-history.mjs";
11
- export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, parseActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, uiTarsPlanning };
11
+ export { AIActionType, AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, parseActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, uiTarsPlanning };
@@ -6,7 +6,7 @@ import { AIActionType, adaptBboxToRect, expandSearchArea, mergeRects } from "./c
6
6
  import { extractDataQueryPrompt, systemPromptToExtract } from "./prompt/extraction.mjs";
7
7
  import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
8
8
  import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
9
- import { describeUserPage } from "./prompt/util.mjs";
9
+ import { orderSensitiveJudgePrompt, systemPromptToJudgeOrderSensitive } from "./prompt/order-sensitive-judge.mjs";
10
10
  import { callAIWithObjectResponse } from "./service-caller/index.mjs";
11
11
  const debugInspect = getDebug('ai:inspect');
12
12
  const debugSection = getDebug('ai:section');
@@ -60,7 +60,6 @@ async function AiLocateElement(options) {
60
60
  const { screenshotBase64 } = context;
61
61
  assert(targetElementDescription, "cannot find the target element description");
62
62
  const userInstructionPrompt = await findElementPrompt.format({
63
- pageDescription: await describeUserPage(context),
64
63
  targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
65
64
  });
66
65
  const systemPrompt = systemPromptToLocateElement(vlMode);
@@ -129,7 +128,6 @@ async function AiLocateElement(options) {
129
128
  };
130
129
  const element = generateElementByPosition(rectCenter);
131
130
  errors = [];
132
- element.isOrderSensitive = 'object' == typeof res.content && null !== res.content && 'isOrderSensitive' in res.content ? res.content.isOrderSensitive : void 0;
133
131
  if (element) matchedElements = [
134
132
  element
135
133
  ];
@@ -261,6 +259,25 @@ async function AiExtractElementInfo(options) {
261
259
  usage: result.usage
262
260
  };
263
261
  }
264
- export { AiExtractElementInfo, AiLocateElement, AiLocateSection };
262
+ async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
263
+ const systemPrompt = systemPromptToJudgeOrderSensitive();
264
+ const userPrompt = orderSensitiveJudgePrompt(description);
265
+ const msgs = [
266
+ {
267
+ role: 'system',
268
+ content: systemPrompt
269
+ },
270
+ {
271
+ role: 'user',
272
+ content: userPrompt
273
+ }
274
+ ];
275
+ const result = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);
276
+ return {
277
+ isOrderSensitive: result.content.isOrderSensitive ?? false,
278
+ usage: result.usage
279
+ };
280
+ }
281
+ export { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection };
265
282
 
266
283
  //# sourceMappingURL=inspect.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: await describeUserPage(context),\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement =\n generateElementByPosition(rectCenter);\n errors = [];\n\n element.isOrderSensitive =\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined;\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = await extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","userInstructionPrompt","findElementPrompt","describeUserPage","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","generateElementByPosition","undefined","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;;;;;;;;AAmDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IASC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,iBAAiB,MAAMC,iBAAiBT;QACxC,0BAA0BX,wBAAwBY;IACpD;IACA,MAAMS,eAAeC,4BAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;YAWXkB,4BACCC;QAXdZ,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,cAAXA,QAAsB;QAC/B,MAAMe,eAAe,MAAMC,4BAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC;IAEA,MAAMzB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMoB,SAAS,MAAM9B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMC,MAAM,MAAMpB,SAASR,MAAM6B,aAAa,eAAe,EAAEpB;IAE/D,MAAMqB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,gBACRV,IAAI,OAAO,CAAC,IAAI,EAChBT,YACAC,aAAAA,QACAgB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BhB,oBACAC,qBACAZ;YAGFlB,aAAa,WAAWwC;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMQ,UACJC,0BAA0BF;YAC5BL,SAAS,EAAE;YAEXM,QAAQ,gBAAgB,GACtB,AAAuB,YAAvB,OAAOZ,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCc;YAEN,IAAIF,SACFP,kBAAkB;gBAACO;aAAQ;QAE/B;IACF,EAAE,OAAOG,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACT,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEU,IAAI,CAAC,CAAC;aAFtBV,SAAS;YAACU;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMZ;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;IAClB;AACF;AAEO,eAAekB,gBAAgBzC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEyC,kBAAkB,EAAEtC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAegC,4BAA4BtC;IACjD,MAAMuC,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoBvD,wBAAwBoD;IAC9C;IACA,MAAM/C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMsC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMpB,SAAS,MAAM9B,mBAAmB;YACtC,QAAQkD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA/C,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMwB,SAAS,MAAMC,yBACnBpD,MACA6B,aAAa,YAAY,EACzBpB;IAGF,IAAI4C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAajB,gBACjBgB,aACAhD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0B6D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DzD,aAAa,wBAAwB8D;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASvB,MAAM,OAAO,CAACuB,OAC/B,GAAG,CAAC,CAACA,OACGpB,gBACLoB,MACApD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqB+D;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D/D,aAAa,iBAAiBiE;QAG9BN,cAAcQ,iBAAiBF,YAAYrD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B2D;IAC1C;IAEA,IAAIS,cAAcnD;IAClB,IAAI0C,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BrD,kBACA0C,aACA3C,AAAW,cAAXA;QAEFoD,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAapB,KAAK,SAAS,CAACoB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwB5D,OAO7C;IACC,MAAM,EAAE6D,SAAS,EAAE5D,OAAO,EAAE6D,aAAa,EAAErE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAeoD;IACrB,MAAM,EAAEzD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM+D,wBAAwB,MAAMC,uBAClCjE,QAAQ,eAAe,IAAI,IAC3B6D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK5D;YACL,QAAQ;QACV;IACF;IAGF4D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMrE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASuD;QACX;KACD;IAED,IAAIzE,kBAAkB;QACpB,MAAM6B,SAAS,MAAM9B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMwB,SAAS,MAAMC,yBACnBpD,MACA6B,aAAa,YAAY,EACzBpB;IAEF,OAAO;QACL,aAAa0C,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;IACrB;AACF"}
1
+ {"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement =\n generateElementByPosition(rectCenter);\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = await extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(\n msgs,\n AIActionType.INSPECT_ELEMENT, // Reuse existing action type for now\n modelConfig,\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;AAsDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IASC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,0BAA0BnB,wBAAwBY;IACpD;IACA,MAAMQ,eAAeC,4BAA4BN;IAEjD,IAAIO,eAAeN;IACnB,IAAIO,aAAaZ,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIa,cAAcb,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIc,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAId,QAAQ,YAAY,EAAE;YAWXiB,4BACCC;QAXdX,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFY,eAAeZ,QAAQ,YAAY,CAAC,WAAW;QAC/Ca,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIT,AAAW,cAAXA,QAAsB;QAC/B,MAAMc,eAAe,MAAMC,4BAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMC,MAAM,MAAMnB,SAASR,MAAM4B,aAAa,eAAe,EAAEnB;IAE/D,MAAMoB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,gBACRV,IAAI,OAAO,CAAC,IAAI,EAChBT,YACAC,aAAAA,QACAgB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BhB,oBACAC,qBACAX;YAGFlB,aAAa,WAAWuC;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMQ,UACJC,0BAA0BF;YAC5BL,SAAS,EAAE;YAEX,IAAIM,SACFP,kBAAkB;gBAACO;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACR,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;IAClB;AACF;AAEO,eAAeiB,gBAAgBvC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEuC,kBAAkB,EAAEpC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMS,eAAe+B,4BAA4BpC;IACjD,MAAMqC,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoBrD,wBAAwBkD;IAC9C;IACA,MAAM7C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKJ;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMnB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQgD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA7C,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMuB,SAAS,MAAMC,yBACnBlD,MACA4B,aAAa,YAAY,EACzBnB;IAGF,IAAI0C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,gBACjBe,aACA9C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0B2D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DvD,aAAa,wBAAwB4D;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAStB,MAAM,OAAO,CAACsB,OAC/B,GAAG,CAAC,CAACA,OACGnB,gBACLmB,MACAlD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqB6D;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D7D,aAAa,iBAAiB+D;QAG9BN,cAAcQ,iBAAiBF,YAAYnD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2ByD;IAC1C;IAEA,IAAIS,cAAcjD;IAClB,IAAIwC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BnD,kBACAwC,aACAzC,AAAW,cAAXA;QAEFkD,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAanB,KAAK,SAAS,CAACmB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwB1D,OAO7C;IACC,MAAM,EAAE2D,SAAS,EAAE1D,OAAO,EAAE2D,aAAa,EAAEnE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAMU,eAAemD;IACrB,MAAM,EAAEvD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM6D,wBAAwB,MAAMC,uBAClC/D,QAAQ,eAAe,IAAI,IAC3B2D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK1D;YACL,QAAQ;QACV;IACF;IAGF0D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMnE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,IAAIvE,kBAAkB;QACpB,MAAM4B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMuB,SAAS,MAAMC,yBACnBlD,MACA4B,aAAa,YAAY,EACzBnB;IAEF,OAAO;QACL,aAAawC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB/D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMM,eAAeyD;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMvE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0D;QACX;KACD;IAED,MAAMxB,SAAS,MAAMzC,SACnBR,MACA4B,aAAa,eAAe,EAC5BnB;IAGF,OAAO;QACL,kBAAkBwC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
@@ -4,68 +4,44 @@ function systemPromptToLocateElement(vlMode) {
4
4
  const bboxComment = bboxDescription(vlMode);
5
5
  return `
6
6
  ## Role:
7
- You are an expert in software testing.
7
+ You are an AI assistant that helps identify UI elements.
8
8
 
9
9
  ## Objective:
10
- - Identify elements in screenshots and text that match the user's description.
11
- - Give the coordinates of the element that matches the user's description best in the screenshot.
12
- - Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
10
+ - Identify elements in screenshots that match the user's description.
11
+ - Provide the coordinates of the element that matches the user's description.
13
12
 
14
13
  ## Output Format:
15
14
  \`\`\`json
16
15
  {
17
16
  "bbox": [number, number, number, number], // ${bboxComment}
18
- "errors"?: string[],
19
- "isOrderSensitive": boolean // Whether the targetElementDescription is order-sensitive (true/false)
17
+ "errors"?: string[]
20
18
  }
21
19
  \`\`\`
22
20
 
23
21
  Fields:
24
- * \`bbox\` is the bounding box of the element that matches the user's description best in the screenshot
25
- * \`isOrderSensitive\` is a boolean indicating whether the user's description is order-sensitive (true/false)
22
+ * \`bbox\` is the bounding box of the element that matches the user's description
26
23
  * \`errors\` is an optional array of error messages (if any)
27
24
 
28
- Order-sensitive means the description contains phrases like:
29
- - "the third item in the list"
30
- - "the last button"
31
- - "the first input box"
32
- - "the second row"
33
-
34
- Not order-sensitive means the description is like:
35
- - "confirm button"
36
- - "search box"
37
- - "password input"
38
-
39
- For example, when an element is found and the description is order-sensitive:
25
+ For example, when an element is found:
40
26
  \`\`\`json
41
27
  {
42
28
  "bbox": [100, 100, 200, 200],
43
- "isOrderSensitive": true,
44
29
  "errors": []
45
30
  }
46
31
  \`\`\`
47
32
 
48
- When no element is found and the description is not order-sensitive:
33
+ When no element is found:
49
34
  \`\`\`json
50
35
  {
51
36
  "bbox": [],
52
- "isOrderSensitive": false,
53
37
  "errors": ["I can see ..., but {some element} is not found"]
54
38
  }
55
39
  \`\`\`
56
40
  `;
57
41
  }
58
42
  const findElementPrompt = new PromptTemplate({
59
- template: `
60
- Here is the item user want to find:
61
- =====================================
62
- {targetElementDescription}
63
- =====================================
64
-
65
- {pageDescription}
66
- `,
43
+ template: "Find: {targetElementDescription}",
67
44
  inputVariables: [
68
- "pageDescription",
69
45
  "targetElementDescription"
70
46
  ]
71
47
  });
@@ -1 +1 @@
1
- {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an expert in software testing.\n\n## Objective:\n- Identify elements in screenshots and text that match the user's description.\n- Give the coordinates of the element that matches the user's description best in the screenshot.\n- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[],\n \"isOrderSensitive\": boolean // Whether the targetElementDescription is order-sensitive (true/false)\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description best in the screenshot\n* \\`isOrderSensitive\\` is a boolean indicating whether the user's description is order-sensitive (true/false)\n* \\`errors\\` is an optional array of error messages (if any)\n\nOrder-sensitive means the description contains phrases like:\n- \"the third item in the list\"\n- \"the last button\"\n- \"the first input box\"\n- \"the second row\"\n\nNot order-sensitive means the description is like:\n- \"confirm button\"\n- \"search box\"\n- \"password input\"\n\nFor example, when an element is found and the description is order-sensitive:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"isOrderSensitive\": true,\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found and the description is not order-sensitive:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"isOrderSensitive\": false,\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = new PromptTemplate({\n template: `\nHere is the item user want to find:\n=====================================\n{targetElementDescription}\n=====================================\n\n{pageDescription}\n `,\n inputVariables: ['pageDescription', 'targetElementDescription'],\n});\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","PromptTemplate"],"mappings":";;AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;;gDAYsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuC9D,CAAC;AACD;AAEO,MAAME,oBAAoB,IAAIC,eAAe;IAClD,UAAU,CAAC;;;;;;;EAOX,CAAC;IACD,gBAAgB;QAAC;QAAmB;KAA2B;AACjE"}
1
+ {"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = new PromptTemplate({\n template: 'Find: {targetElementDescription}',\n inputVariables: ['targetElementDescription'],\n});\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","PromptTemplate"],"mappings":";;AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,IAAIC,eAAe;IAClD,UAAU;IACV,gBAAgB;QAAC;KAA2B;AAC9C"}