@midscene/core 1.0.1-beta-20251028121806.0 → 1.0.1-beta-20251029093754.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/task-builder.mjs +7 -3
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +5 -0
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/inspect.mjs +21 -4
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-locator.mjs +8 -32
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +15 -14
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +35 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs.map +1 -0
- package/dist/es/device/index.mjs.map +1 -1
- package/dist/es/task-runner.mjs +31 -3
- package/dist/es/task-runner.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/task-builder.js +7 -3
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +5 -0
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/index.js +13 -10
- package/dist/lib/ai-model/inspect.js +25 -5
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-locator.js +8 -32
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-section-locator.js +15 -14
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -1
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +72 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js.map +1 -0
- package/dist/lib/device/index.js.map +1 -1
- package/dist/lib/task-runner.js +31 -3
- package/dist/lib/task-runner.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/index.d.ts +1 -1
- package/dist/types/ai-model/inspect.d.ts +6 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +0 -1
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/device/index.d.ts +4 -2
- package/dist/types/task-runner.d.ts +2 -0
- package/dist/types/types.d.ts +2 -1
- package/package.json +2 -3
|
@@ -161,6 +161,8 @@ class TaskBuilder {
|
|
|
161
161
|
const actionFn = action.call.bind(this.interface);
|
|
162
162
|
await actionFn(param, taskContext);
|
|
163
163
|
debug('called action', action.name);
|
|
164
|
+
const delayAfterRunner = action.delayAfterRunner ?? 300;
|
|
165
|
+
if (delayAfterRunner > 0) await sleep(delayAfterRunner);
|
|
164
166
|
try {
|
|
165
167
|
if (this.interface.afterInvokeAction) {
|
|
166
168
|
debug('will call "afterInvokeAction" for interface');
|
|
@@ -254,9 +256,11 @@ class TaskBuilder {
|
|
|
254
256
|
const element = elementFromXpath || elementFromCache || elementFromPlan || elementFromAiLocate;
|
|
255
257
|
let currentCacheEntry;
|
|
256
258
|
if (element && this.taskCache && !cacheHitFlag && (null == param ? void 0 : param.cacheable) !== false) if (this.interface.cacheFeatureForRect) try {
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
259
|
+
var _param_prompt;
|
|
260
|
+
const feature = await this.interface.cacheFeatureForRect(element.rect, {
|
|
261
|
+
targetDescription: 'string' == typeof param.prompt ? param.prompt : null == (_param_prompt = param.prompt) ? void 0 : _param_prompt.prompt,
|
|
262
|
+
modelConfig
|
|
263
|
+
});
|
|
260
264
|
if (feature && Object.keys(feature).length > 0) {
|
|
261
265
|
debug('update cache, prompt: %s, cache: %o', cachePrompt, feature);
|
|
262
266
|
currentCacheEntry = feature;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/task-builder.mjs","sources":["webpack://@midscene/core/./src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type {\n DetailedLocateParam,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { matchElementFromCache, matchElementFromPlan } from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n subTask?: boolean;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfig: IModelConfig;\n cacheable?: boolean;\n subTask: boolean;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n constructor({ interfaceInstance, service, taskCache }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfig,\n cacheable,\n subTask: !!options?.subTask,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n [\n 'Sleep',\n (plan) =>\n this.handleSleepPlan(\n plan as PlanningAction<PlanningActionParamSleep>,\n context,\n ),\n ],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n subTask: context.subTask || undefined,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private handleSleepPlan(\n plan: PlanningAction<PlanningActionParamSleep>,\n context: PlanBuildContext,\n ): void {\n const sleepTask = this.createSleepTask(plan.param, {\n thought: plan.thought,\n locate: plan.locate,\n });\n if (context.subTask) {\n sleepTask.subTask = true;\n }\n context.tasks.push(sleepTask);\n }\n\n public createSleepTask(\n param: PlanningActionParamSleep,\n meta?: { thought?: string; locate?: PlanningAction['locate'] | null },\n ): ExecutionTaskActionApply<PlanningActionParamSleep> {\n return {\n type: 'Action',\n subType: 'Sleep',\n param,\n thought: meta?.thought,\n locate: meta?.locate ?? null,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n if (!plan.locate || plan.locate === null) {\n debug('Locate action with id is null, will be ignored', plan);\n return;\n }\n\n const taskLocate = this.createLocateTask(plan, plan.locate, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n subTask: context.subTask || undefined,\n executor: async (param, taskContext) => {\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, taskContext);\n debug('called action', action.name);\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply {\n const { cacheable, modelConfig } = context;\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n subTask: context.subTask || undefined,\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (param.xpath && this.interface.rectMatchesCacheFeature) {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n }\n const elementFromXpath = rectFromXpath\n ? generateElementByPosition({\n x: rectFromXpath.left + rectFromXpath.width / 2,\n y: rectFromXpath.top + rectFromXpath.height / 2,\n })\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n let elementFromAiLocate: LocateResultElement | null | undefined;\n if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) {\n try {\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfig,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n }\n\n const element =\n elementFromXpath ||\n elementFromCache ||\n elementFromPlan ||\n elementFromAiLocate;\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n element.isOrderSensitive !== undefined\n ? { _orderSensitive: element.isOrderSensitive }\n : undefined,\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n rect: elementFromPlan?.rect,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n hitBy,\n };\n },\n };\n\n return taskFind;\n }\n}\n"],"names":["debug","getDebug","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfig","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","undefined","sleepTask","meta","taskParam","sleep","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","locateTask","result","assert","task","taskContext","_taskContext_element","uiContext","Promise","originalError","originalMessage","String","parseActionParam","error","actionFn","detailedLocateParam","onResult","locateParam","taskFind","_this_taskCache","_locateCacheRecord_cacheContent","locateDump","locateResult","applyDump","dump","_dump_taskInfo","_dump_taskInfo1","rectFromXpath","elementFromXpath","generateElementByPosition","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","ServiceError","element","currentCacheEntry","feature","Object","hitBy","interfaceInstance","service","taskCache"],"mappings":";;;;;;;;;;;;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AAEhB,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAoBO,MAAMC;IAaX,MAAa,MACXC,KAAuB,EACvBC,WAAyB,EACzBC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,SAAS;QAEpC,MAAMG,UAA4B;YAChCF;YACAF;YACAG;YACA,SAAS,CAAC,CAACF,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QAC5B;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;YAC9D;gBACE;gBACA,CAACG,OACC,IAAI,CAAC,eAAe,CAClBA,MACAH;aAEL;SACF;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQR,MAAO;YACxB,MAAMU,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;YACnB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,WAAa;QACzB;QACAP,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEQ,gBACNH,IAA8C,EAC9CH,OAAyB,EACnB;QACN,MAAMQ,YAAY,IAAI,CAAC,eAAe,CAACL,KAAK,KAAK,EAAE;YACjD,SAASA,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;QACrB;QACA,IAAIH,QAAQ,OAAO,EACjBQ,UAAU,OAAO,GAAG;QAEtBR,QAAQ,KAAK,CAAC,IAAI,CAACQ;IACrB;IAEO,gBACLjB,KAA+B,EAC/BkB,IAAqE,EACjB;QACpD,OAAO;YACL,MAAM;YACN,SAAS;YACTlB;YACA,SAASkB,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO;YACtB,QAAQA,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,MAAM,AAAD,KAAK;YACxB,UAAU,OAAOC;gBACf,MAAMC,MAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;YACnC;QACF;IACF;IAEA,MAAc,iBACZP,IAAyC,EACzCH,OAAyB,EACV;QACf,IAAI,CAACG,KAAK,MAAM,IAAIA,AAAgB,SAAhBA,KAAK,MAAM,EAAW,YACxCf,MAAM,kDAAkDe;QAI1D,MAAMS,aAAa,IAAI,CAAC,gBAAgB,CAACT,MAAMA,KAAK,MAAM,EAAEH;QAC5DA,QAAQ,KAAK,CAAC,IAAI,CAACY;IACrB;IAEA,MAAc,iBACZT,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMa,WAAWV,KAAK,IAAI;QAC1B,MAAMW,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;QACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMtB,QAAQY,KAAK,KAAK;QAExB,IAAI,CAACY,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI9B,KAAK,CAAC8B,MAAM,EAAE;gBAChB,MAAM5B,aAAaH,oBAAoBC,KAAK,CAAC8B,MAAM;gBACnDjC,MACE,uCACA,CAAC,YAAY,EAAEyB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC/B,KAAK,CAAC8B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAAC7B,aAAa;gBAE5C,MAAM8B,aAAa,IAAI,CAAC,gBAAgB,CACtC9B,YACAF,KAAK,CAAC8B,MAAM,EACZrB,SACA,CAACwB;oBACCjC,KAAK,CAAC8B,MAAM,GAAGG;gBACjB;gBAEFxB,QAAQ,KAAK,CAAC,IAAI,CAACuB;YACrB,OAAO;gBACLE,OACE,CAACL,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3EzB,MAAM,CAAC,OAAO,EAAEiC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMa,OAKF;YACF,MAAM;YACN,SAASb;YACT,SAASV,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,OAAOhB,OAAOoC;oBAKWC;gBAJjCxC,MACE,oBACAyB,UACAtB,OACA,CAAC,4BAA4B,EAAE,QAAAqC,CAAAA,uBAAAA,YAAY,OAAO,AAAD,IAAlBA,KAAAA,IAAAA,qBAAqB,MAAM,EAAE;gBAG9D,MAAMC,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBT,qBAAqB,OAAO,CAAC,CAACC;oBAC5BI,OACElC,KAAK,CAAC8B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEA,IAAI;oBACF,MAAMiB,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC1C,MAAM;gCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC2B,OAAO,IAAI,EAAExB;gCACrDH,MAAM;4BACR;wBACF;wBACAuB,MAAM;qBACP;gBACH,EAAE,OAAOoB,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,IAAIhB,OAAO,WAAW,EACpB,IAAI;oBACFxB,QAAQ2C,iBAAiB3C,OAAOwB,OAAO,WAAW;gBACpD,EAAE,OAAOoB,OAAY;oBACnB,MAAM,IAAIlB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,MAAM,OAAO,CAAC,cAAc,EAAEb,KAAK,SAAS,CAAC/B,QAAQ,EACtG;wBAAE,OAAO4C;oBAAM;gBAEnB;gBAGF/C,MAAM,kBAAkB2B,OAAO,IAAI;gBACnC,MAAMqB,WAAWrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAMqB,SAAS7C,OAAOoC;gBACtBvC,MAAM,iBAAiB2B,OAAO,IAAI;gBAElC,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpC3B,MAAM;wBACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC2B,OAAO,IAAI,EAAExB;wBACpDH,MAAM;oBACR;gBACF,EAAE,OAAO2C,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,OAAO;oBACL,QAAQ;wBACN,SAAS;wBACT,QAAQlB;wBACR,OAAOtB;oBACT;gBACF;YACF;QACF;QAEAS,QAAQ,KAAK,CAAC,IAAI,CAAC0B;IACrB;IAEQ,iBACNvB,IAAyC,EACzCkC,mBAAiD,EACjDrC,OAAyB,EACzBsC,QAAgD,EACf;QACjC,MAAM,EAAEvC,SAAS,EAAEH,WAAW,EAAE,GAAGI;QACnC,IAAIuC,cAAcF;QAElB,IAAI,AAAuB,YAAvB,OAAOE,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAIxC,AAAcQ,WAAdR,WACFwC,cAAc;YACZ,GAAGA,WAAW;YACdxC;QACF;QAGF,MAAMyC,WAA4C;YAChD,MAAM;YACN,SAAS;YACT,SAASxC,QAAQ,OAAO,IAAIO;YAC5B,OAAOgC;YACP,SAASpC,KAAK,OAAO;YACrB,UAAU,OAAOZ,OAAOoC;oBAkDIc,iBACPC;gBAlDnB,MAAM,EAAEhB,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACElC,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GAC3B,CAAC,qDAAqD,EAAE+B,KAAK,SAAS,CACpE/B,QACC;gBAGL,IAAI,CAACsC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,IAAIc;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;wBAQJC,gBACTC;oBARJ,IAAI,CAACF,MACH;oBAEFH,aAAaG;oBACbpB,KAAK,GAAG,GAAG;wBACToB;oBACF;oBACApB,KAAK,KAAK,GAAG,QAAAqB,CAAAA,iBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,eAAe,KAAK;oBACjC,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,gBAAe,eAAe,EAChCtB,KAAK,eAAe,GAAGoB,KAAK,QAAQ,CAAC,eAAe;gBAExD;gBAGA,IAAIG;gBACJ,IAAI1D,MAAM,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,uBAAuB,EACvD0D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;oBAC3D,QAAQ;wBAAC1D,MAAM,KAAK;qBAAC;gBACvB;gBAEF,MAAM2D,mBAAmBD,gBACrBE,0BAA0B;oBACxB,GAAGF,cAAc,IAAI,GAAGA,cAAc,KAAK,GAAG;oBAC9C,GAAGA,cAAc,GAAG,GAAGA,cAAc,MAAM,GAAG;gBAChD,KACA1C;gBACJ,MAAM6C,0BAA0B,CAAC,CAACF;gBAElC,MAAMG,cAAc9D,MAAM,MAAM;gBAChC,MAAM+D,oBAAoB,QAAAb,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACY;gBAC3D,MAAME,aAAab,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;gBAEzD,MAAMc,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACA9D,MAAM,SAAS;gBAErB,MAAMmE,eAAe,CAAC,CAACF;gBAEvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBnD,SADAqD,qBAAqBrE;gBAE3B,MAAMsE,cAAc,CAAC,CAACF;gBAEtB,IAAIG;gBACJ,IAAI,CAACV,2BAA2B,CAACM,gBAAgB,CAACG,aAChD,IAAI;oBACFjB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCrD,OACA;wBACE,SAASsC;oBACX,GACAjC;oBAEFiD,UAAUD,aAAa,IAAI;oBAC3BkB,sBAAsBlB,aAAa,OAAO;gBAC5C,EAAE,OAAOT,OAAO;oBACd,IAAIA,iBAAiB4B,cACnBlB,UAAUV,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAGF,MAAM6B,UACJd,oBACAM,oBACAG,mBACAG;gBAEF,IAAIG;gBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDnE,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;oBACF,MAAM2E,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDF,QAAQ,IAAI,EACZA,AAA6BzD,WAA7ByD,QAAQ,gBAAgB,GACpB;wBAAE,iBAAiBA,QAAQ,gBAAgB;oBAAC,IAC5CzD;oBAEN,IAAI2D,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;wBAC9C9E,MACE,uCACAiE,aACAa;wBAEFD,oBAAoBC;wBACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;4BACE,MAAM;4BACN,QAAQb;4BACR,OAAOa;wBACT,GACAZ;oBAEJ,OACElE,MACE,yDACAiE;gBAGN,EAAE,OAAOlB,OAAO;oBACd/C,MAAM,kCAAkC+C;gBAC1C;qBAEA/C,MAAM;gBAIV,IAAI,CAAC4E,SAAS;oBACZ,IAAIrB,YACF,MAAM,IAAIoB,aACR,CAAC,oBAAoB,EAAExE,MAAM,MAAM,EAAE,EACrCoD;oBAGJ,MAAM,IAAI1B,MAAM,CAAC,mBAAmB,EAAE1B,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAI6E;gBAEJ,IAAIhB,yBACFgB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAO7E,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAImE,cACTU,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACPb;wBACA,aAAaU;oBACf;gBACF;qBACK,IAAIJ,aACTO,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMT,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;oBAC7B;gBACF;gBAGFrB,QAAAA,YAAAA,SAAW0B;gBAEX,OAAO;oBACL,QAAQ;wBACNA;oBACF;oBACAI;gBACF;YACF;QACF;QAEA,OAAO5B;IACT;IA1dA,YAAY,EAAE6B,iBAAiB,EAAEC,OAAO,EAAEC,SAAS,EAAmB,CAAE;QANxE,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAGE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;IACnB;AAudF"}
|
|
1
|
+
{"version":3,"file":"agent/task-builder.mjs","sources":["webpack://@midscene/core/./src/agent/task-builder.ts"],"sourcesContent":["import { findAllMidsceneLocatorField, parseActionParam } from '@/ai-model';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type {\n DetailedLocateParam,\n ElementCacheFeature,\n ExecutionTaskActionApply,\n ExecutionTaskApply,\n ExecutionTaskHitBy,\n ExecutionTaskInsightLocateApply,\n LocateResultElement,\n LocateResultWithDump,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningLocateParam,\n Rect,\n ServiceDump,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport { sleep } from '@/utils';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TaskCache } from './task-cache';\nimport { matchElementFromCache, matchElementFromPlan } from './utils';\n\nconst debug = getDebug('agent:task-builder');\n\nexport function locatePlanForLocate(param: string | DetailedLocateParam) {\n const locate = typeof param === 'string' ? { prompt: param } : param;\n const locatePlan: PlanningAction<PlanningLocateParam> = {\n type: 'Locate',\n locate,\n param: locate,\n thought: '',\n };\n return locatePlan;\n}\n\ninterface TaskBuilderDeps {\n interfaceInstance: AbstractInterface;\n service: Service;\n taskCache?: TaskCache;\n}\n\ninterface BuildOptions {\n cacheable?: boolean;\n subTask?: boolean;\n}\n\ninterface PlanBuildContext {\n tasks: ExecutionTaskApply[];\n modelConfig: IModelConfig;\n cacheable?: boolean;\n subTask: boolean;\n}\n\nexport class TaskBuilder {\n private readonly interface: AbstractInterface;\n\n private readonly service: Service;\n\n private readonly taskCache?: TaskCache;\n\n constructor({ interfaceInstance, service, taskCache }: TaskBuilderDeps) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = taskCache;\n }\n\n public async build(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: BuildOptions,\n ): Promise<{ tasks: ExecutionTaskApply[] }> {\n const tasks: ExecutionTaskApply[] = [];\n const cacheable = options?.cacheable;\n\n const context: PlanBuildContext = {\n tasks,\n modelConfig,\n cacheable,\n subTask: !!options?.subTask,\n };\n\n type PlanHandler = (plan: PlanningAction) => Promise<void> | void;\n\n const planHandlers = new Map<string, PlanHandler>([\n [\n 'Locate',\n (plan) =>\n this.handleLocatePlan(\n plan as PlanningAction<PlanningLocateParam>,\n context,\n ),\n ],\n ['Finished', (plan) => this.handleFinishedPlan(plan, context)],\n [\n 'Sleep',\n (plan) =>\n this.handleSleepPlan(\n plan as PlanningAction<PlanningActionParamSleep>,\n context,\n ),\n ],\n ]);\n\n const defaultHandler: PlanHandler = (plan) =>\n this.handleActionPlan(plan, context);\n\n for (const plan of plans) {\n const handler = planHandlers.get(plan.type) ?? defaultHandler;\n await handler(plan);\n }\n\n return {\n tasks,\n };\n }\n\n private handleFinishedPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): void {\n const taskActionFinished: ExecutionTaskActionApply<null> = {\n type: 'Action',\n subType: 'Finished',\n param: null,\n thought: plan.thought,\n locate: plan.locate,\n subTask: context.subTask || undefined,\n executor: async () => {},\n };\n context.tasks.push(taskActionFinished);\n }\n\n private handleSleepPlan(\n plan: PlanningAction<PlanningActionParamSleep>,\n context: PlanBuildContext,\n ): void {\n const sleepTask = this.createSleepTask(plan.param, {\n thought: plan.thought,\n locate: plan.locate,\n });\n if (context.subTask) {\n sleepTask.subTask = true;\n }\n context.tasks.push(sleepTask);\n }\n\n public createSleepTask(\n param: PlanningActionParamSleep,\n meta?: { thought?: string; locate?: PlanningAction['locate'] | null },\n ): ExecutionTaskActionApply<PlanningActionParamSleep> {\n return {\n type: 'Action',\n subType: 'Sleep',\n param,\n thought: meta?.thought,\n locate: meta?.locate ?? null,\n executor: async (taskParam) => {\n await sleep(taskParam?.timeMs || 3000);\n },\n };\n }\n\n private async handleLocatePlan(\n plan: PlanningAction<PlanningLocateParam>,\n context: PlanBuildContext,\n ): Promise<void> {\n if (!plan.locate || plan.locate === null) {\n debug('Locate action with id is null, will be ignored', plan);\n return;\n }\n\n const taskLocate = this.createLocateTask(plan, plan.locate, context);\n context.tasks.push(taskLocate);\n }\n\n private async handleActionPlan(\n plan: PlanningAction,\n context: PlanBuildContext,\n ): Promise<void> {\n const planType = plan.type;\n const actionSpace = await this.interface.actionSpace();\n const action = actionSpace.find((item) => item.name === planType);\n const param = plan.param;\n\n if (!action) {\n throw new Error(`Action type '${planType}' not found`);\n }\n\n const locateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema)\n : [];\n\n const requiredLocateFields = action\n ? findAllMidsceneLocatorField(action.paramSchema, true)\n : [];\n\n locateFields.forEach((field) => {\n if (param[field]) {\n const locatePlan = locatePlanForLocate(param[field]);\n debug(\n 'will prepend locate param for field',\n `action.type=${planType}`,\n `param=${JSON.stringify(param[field])}`,\n `locatePlan=${JSON.stringify(locatePlan)}`,\n );\n const locateTask = this.createLocateTask(\n locatePlan,\n param[field],\n context,\n (result) => {\n param[field] = result;\n },\n );\n context.tasks.push(locateTask);\n } else {\n assert(\n !requiredLocateFields.includes(field),\n `Required locate field '${field}' is not provided for action ${planType}`,\n );\n debug(`field '${field}' is not provided for action ${planType}`);\n }\n });\n\n const task: ExecutionTaskApply<\n 'Action',\n any,\n { success: boolean; action: string; param: any },\n void\n > = {\n type: 'Action',\n subType: planType,\n thought: plan.thought,\n param: plan.param,\n subTask: context.subTask || undefined,\n executor: async (param, taskContext) => {\n debug(\n 'executing action',\n planType,\n param,\n `taskContext.element.center: ${taskContext.element?.center}`,\n );\n\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Action task');\n\n requiredLocateFields.forEach((field) => {\n assert(\n param[field],\n `field '${field}' is required for action ${planType} but not provided. Cannot execute action ${planType}.`,\n );\n });\n\n try {\n await Promise.all([\n (async () => {\n if (this.interface.beforeInvokeAction) {\n debug('will call \"beforeInvokeAction\" for interface');\n await this.interface.beforeInvokeAction(action.name, param);\n debug('called \"beforeInvokeAction\" for interface');\n }\n })(),\n sleep(200),\n ]);\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running beforeInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n if (action.paramSchema) {\n try {\n param = parseActionParam(param, action.paramSchema);\n } catch (error: any) {\n throw new Error(\n `Invalid parameters for action ${action.name}: ${error.message}\\nParameters: ${JSON.stringify(param)}`,\n { cause: error },\n );\n }\n }\n\n debug('calling action', action.name);\n const actionFn = action.call.bind(this.interface);\n await actionFn(param, taskContext);\n debug('called action', action.name);\n\n const delayAfterRunner = action.delayAfterRunner ?? 300;\n if (delayAfterRunner > 0) {\n await sleep(delayAfterRunner);\n }\n\n try {\n if (this.interface.afterInvokeAction) {\n debug('will call \"afterInvokeAction\" for interface');\n await this.interface.afterInvokeAction(action.name, param);\n debug('called \"afterInvokeAction\" for interface');\n }\n } catch (originalError: any) {\n const originalMessage =\n originalError?.message || String(originalError);\n throw new Error(\n `error in running afterInvokeAction for ${action.name}: ${originalMessage}`,\n { cause: originalError },\n );\n }\n\n return {\n output: {\n success: true,\n action: planType,\n param: param,\n },\n };\n },\n };\n\n context.tasks.push(task);\n }\n\n private createLocateTask(\n plan: PlanningAction<PlanningLocateParam>,\n detailedLocateParam: DetailedLocateParam | string,\n context: PlanBuildContext,\n onResult?: (result: LocateResultElement) => void,\n ): ExecutionTaskInsightLocateApply {\n const { cacheable, modelConfig } = context;\n let locateParam = detailedLocateParam;\n\n if (typeof locateParam === 'string') {\n locateParam = {\n prompt: locateParam,\n };\n }\n\n if (cacheable !== undefined) {\n locateParam = {\n ...locateParam,\n cacheable,\n };\n }\n\n const taskFind: ExecutionTaskInsightLocateApply = {\n type: 'Insight',\n subType: 'Locate',\n subTask: context.subTask || undefined,\n param: locateParam,\n thought: plan.thought,\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let { uiContext } = taskContext;\n\n assert(\n param?.prompt || param?.bbox,\n `No prompt or id or position or bbox to locate, param=${JSON.stringify(\n param,\n )}`,\n );\n\n if (!uiContext) {\n uiContext = await this.service.contextRetrieverFn();\n }\n\n assert(uiContext, 'uiContext is required for Service task');\n\n let locateDump: ServiceDump | undefined;\n let locateResult: LocateResultWithDump | undefined;\n\n const applyDump = (dump?: ServiceDump) => {\n if (!dump) {\n return;\n }\n locateDump = dump;\n task.log = {\n dump,\n };\n task.usage = dump.taskInfo?.usage;\n if (dump.taskInfo?.searchAreaUsage) {\n task.searchAreaUsage = dump.taskInfo.searchAreaUsage;\n }\n };\n\n // from xpath\n let rectFromXpath: Rect | undefined;\n if (param.xpath && this.interface.rectMatchesCacheFeature) {\n rectFromXpath = await this.interface.rectMatchesCacheFeature({\n xpaths: [param.xpath],\n });\n }\n const elementFromXpath = rectFromXpath\n ? generateElementByPosition({\n x: rectFromXpath.left + rectFromXpath.width / 2,\n y: rectFromXpath.top + rectFromXpath.height / 2,\n })\n : undefined;\n const userExpectedPathHitFlag = !!elementFromXpath;\n\n const cachePrompt = param.prompt;\n const locateCacheRecord = this.taskCache?.matchLocateCache(cachePrompt);\n const cacheEntry = locateCacheRecord?.cacheContent?.cache;\n\n const elementFromCache = userExpectedPathHitFlag\n ? null\n : await matchElementFromCache(\n {\n taskCache: this.taskCache,\n interfaceInstance: this.interface,\n },\n cacheEntry,\n cachePrompt,\n param.cacheable,\n );\n const cacheHitFlag = !!elementFromCache;\n\n const elementFromPlan =\n !userExpectedPathHitFlag && !cacheHitFlag\n ? matchElementFromPlan(param)\n : undefined;\n const planHitFlag = !!elementFromPlan;\n\n let elementFromAiLocate: LocateResultElement | null | undefined;\n if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) {\n try {\n locateResult = await this.service.locate(\n param,\n {\n context: uiContext,\n },\n modelConfig,\n );\n applyDump(locateResult.dump);\n elementFromAiLocate = locateResult.element;\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n }\n\n const element =\n elementFromXpath ||\n elementFromCache ||\n elementFromPlan ||\n elementFromAiLocate;\n\n let currentCacheEntry: ElementCacheFeature | undefined;\n if (\n element &&\n this.taskCache &&\n !cacheHitFlag &&\n param?.cacheable !== false\n ) {\n if (this.interface.cacheFeatureForRect) {\n try {\n const feature = await this.interface.cacheFeatureForRect(\n element.rect,\n {\n targetDescription:\n typeof param.prompt === 'string'\n ? param.prompt\n : param.prompt?.prompt,\n modelConfig,\n },\n );\n if (feature && Object.keys(feature).length > 0) {\n debug(\n 'update cache, prompt: %s, cache: %o',\n cachePrompt,\n feature,\n );\n currentCacheEntry = feature;\n this.taskCache.updateOrAppendCacheRecord(\n {\n type: 'locate',\n prompt: cachePrompt,\n cache: feature,\n },\n locateCacheRecord,\n );\n } else {\n debug(\n 'no cache data returned, skip cache update, prompt: %s',\n cachePrompt,\n );\n }\n } catch (error) {\n debug('cacheFeatureForRect failed: %s', error);\n }\n } else {\n debug('cacheFeatureForRect is not supported, skip cache update');\n }\n }\n\n if (!element) {\n if (locateDump) {\n throw new ServiceError(\n `Element not found : ${param.prompt}`,\n locateDump,\n );\n }\n throw new Error(`Element not found: ${param.prompt}`);\n }\n\n let hitBy: ExecutionTaskHitBy | undefined;\n\n if (userExpectedPathHitFlag) {\n hitBy = {\n from: 'User expected path',\n context: {\n xpath: param.xpath,\n },\n };\n } else if (cacheHitFlag) {\n hitBy = {\n from: 'Cache',\n context: {\n cacheEntry,\n cacheToSave: currentCacheEntry,\n },\n };\n } else if (planHitFlag) {\n hitBy = {\n from: 'Planning',\n context: {\n rect: elementFromPlan?.rect,\n },\n };\n }\n\n onResult?.(element);\n\n return {\n output: {\n element,\n },\n hitBy,\n };\n },\n };\n\n return taskFind;\n }\n}\n"],"names":["debug","getDebug","locatePlanForLocate","param","locate","locatePlan","TaskBuilder","plans","modelConfig","options","tasks","cacheable","context","planHandlers","Map","plan","defaultHandler","handler","taskActionFinished","undefined","sleepTask","meta","taskParam","sleep","taskLocate","planType","actionSpace","action","item","Error","locateFields","findAllMidsceneLocatorField","requiredLocateFields","field","JSON","locateTask","result","assert","task","taskContext","_taskContext_element","uiContext","Promise","originalError","originalMessage","String","parseActionParam","error","actionFn","delayAfterRunner","detailedLocateParam","onResult","locateParam","taskFind","_this_taskCache","_locateCacheRecord_cacheContent","locateDump","locateResult","applyDump","dump","_dump_taskInfo","_dump_taskInfo1","rectFromXpath","elementFromXpath","generateElementByPosition","userExpectedPathHitFlag","cachePrompt","locateCacheRecord","cacheEntry","elementFromCache","matchElementFromCache","cacheHitFlag","elementFromPlan","matchElementFromPlan","planHitFlag","elementFromAiLocate","ServiceError","element","currentCacheEntry","_param_prompt","feature","Object","hitBy","interfaceInstance","service","taskCache"],"mappings":";;;;;;;;;;;;;;;;;AA2BA,MAAMA,QAAQC,SAAS;AAEhB,SAASC,oBAAoBC,KAAmC;IACrE,MAAMC,SAAS,AAAiB,YAAjB,OAAOD,QAAqB;QAAE,QAAQA;IAAM,IAAIA;IAC/D,MAAME,aAAkD;QACtD,MAAM;QACND;QACA,OAAOA;QACP,SAAS;IACX;IACA,OAAOC;AACT;AAoBO,MAAMC;IAaX,MAAa,MACXC,KAAuB,EACvBC,WAAyB,EACzBC,OAAsB,EACoB;QAC1C,MAAMC,QAA8B,EAAE;QACtC,MAAMC,YAAYF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,SAAS;QAEpC,MAAMG,UAA4B;YAChCF;YACAF;YACAG;YACA,SAAS,CAAC,CAACF,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,OAAO,AAAD;QAC5B;QAIA,MAAMI,eAAe,IAAIC,IAAyB;YAChD;gBACE;gBACA,CAACC,OACC,IAAI,CAAC,gBAAgB,CACnBA,MACAH;aAEL;YACD;gBAAC;gBAAY,CAACG,OAAS,IAAI,CAAC,kBAAkB,CAACA,MAAMH;aAAS;YAC9D;gBACE;gBACA,CAACG,OACC,IAAI,CAAC,eAAe,CAClBA,MACAH;aAEL;SACF;QAED,MAAMI,iBAA8B,CAACD,OACnC,IAAI,CAAC,gBAAgB,CAACA,MAAMH;QAE9B,KAAK,MAAMG,QAAQR,MAAO;YACxB,MAAMU,UAAUJ,aAAa,GAAG,CAACE,KAAK,IAAI,KAAKC;YAC/C,MAAMC,QAAQF;QAChB;QAEA,OAAO;YACLL;QACF;IACF;IAEQ,mBACNK,IAAoB,EACpBH,OAAyB,EACnB;QACN,MAAMM,qBAAqD;YACzD,MAAM;YACN,SAAS;YACT,OAAO;YACP,SAASH,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;YACnB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,WAAa;QACzB;QACAP,QAAQ,KAAK,CAAC,IAAI,CAACM;IACrB;IAEQ,gBACNH,IAA8C,EAC9CH,OAAyB,EACnB;QACN,MAAMQ,YAAY,IAAI,CAAC,eAAe,CAACL,KAAK,KAAK,EAAE;YACjD,SAASA,KAAK,OAAO;YACrB,QAAQA,KAAK,MAAM;QACrB;QACA,IAAIH,QAAQ,OAAO,EACjBQ,UAAU,OAAO,GAAG;QAEtBR,QAAQ,KAAK,CAAC,IAAI,CAACQ;IACrB;IAEO,gBACLjB,KAA+B,EAC/BkB,IAAqE,EACjB;QACpD,OAAO;YACL,MAAM;YACN,SAAS;YACTlB;YACA,SAASkB,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,OAAO;YACtB,QAAQA,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,MAAM,AAAD,KAAK;YACxB,UAAU,OAAOC;gBACf,MAAMC,MAAMD,AAAAA,CAAAA,QAAAA,YAAAA,KAAAA,IAAAA,UAAW,MAAM,AAAD,KAAK;YACnC;QACF;IACF;IAEA,MAAc,iBACZP,IAAyC,EACzCH,OAAyB,EACV;QACf,IAAI,CAACG,KAAK,MAAM,IAAIA,AAAgB,SAAhBA,KAAK,MAAM,EAAW,YACxCf,MAAM,kDAAkDe;QAI1D,MAAMS,aAAa,IAAI,CAAC,gBAAgB,CAACT,MAAMA,KAAK,MAAM,EAAEH;QAC5DA,QAAQ,KAAK,CAAC,IAAI,CAACY;IACrB;IAEA,MAAc,iBACZT,IAAoB,EACpBH,OAAyB,EACV;QACf,MAAMa,WAAWV,KAAK,IAAI;QAC1B,MAAMW,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;QACpD,MAAMC,SAASD,YAAY,IAAI,CAAC,CAACE,OAASA,KAAK,IAAI,KAAKH;QACxD,MAAMtB,QAAQY,KAAK,KAAK;QAExB,IAAI,CAACY,QACH,MAAM,IAAIE,MAAM,CAAC,aAAa,EAAEJ,SAAS,WAAW,CAAC;QAGvD,MAAMK,eAAeH,SACjBI,4BAA4BJ,OAAO,WAAW,IAC9C,EAAE;QAEN,MAAMK,uBAAuBL,SACzBI,4BAA4BJ,OAAO,WAAW,EAAE,QAChD,EAAE;QAENG,aAAa,OAAO,CAAC,CAACG;YACpB,IAAI9B,KAAK,CAAC8B,MAAM,EAAE;gBAChB,MAAM5B,aAAaH,oBAAoBC,KAAK,CAAC8B,MAAM;gBACnDjC,MACE,uCACA,CAAC,YAAY,EAAEyB,UAAU,EACzB,CAAC,MAAM,EAAES,KAAK,SAAS,CAAC/B,KAAK,CAAC8B,MAAM,GAAG,EACvC,CAAC,WAAW,EAAEC,KAAK,SAAS,CAAC7B,aAAa;gBAE5C,MAAM8B,aAAa,IAAI,CAAC,gBAAgB,CACtC9B,YACAF,KAAK,CAAC8B,MAAM,EACZrB,SACA,CAACwB;oBACCjC,KAAK,CAAC8B,MAAM,GAAGG;gBACjB;gBAEFxB,QAAQ,KAAK,CAAC,IAAI,CAACuB;YACrB,OAAO;gBACLE,OACE,CAACL,qBAAqB,QAAQ,CAACC,QAC/B,CAAC,uBAAuB,EAAEA,MAAM,6BAA6B,EAAER,UAAU;gBAE3EzB,MAAM,CAAC,OAAO,EAAEiC,MAAM,6BAA6B,EAAER,UAAU;YACjE;QACF;QAEA,MAAMa,OAKF;YACF,MAAM;YACN,SAASb;YACT,SAASV,KAAK,OAAO;YACrB,OAAOA,KAAK,KAAK;YACjB,SAASH,QAAQ,OAAO,IAAIO;YAC5B,UAAU,OAAOhB,OAAOoC;oBAKWC;gBAJjCxC,MACE,oBACAyB,UACAtB,OACA,CAAC,4BAA4B,EAAE,QAAAqC,CAAAA,uBAAAA,YAAY,OAAO,AAAD,IAAlBA,KAAAA,IAAAA,qBAAqB,MAAM,EAAE;gBAG9D,MAAMC,YAAYF,YAAY,SAAS;gBACvCF,OAAOI,WAAW;gBAElBT,qBAAqB,OAAO,CAAC,CAACC;oBAC5BI,OACElC,KAAK,CAAC8B,MAAM,EACZ,CAAC,OAAO,EAAEA,MAAM,yBAAyB,EAAER,SAAS,yCAAyC,EAAEA,SAAS,CAAC,CAAC;gBAE9G;gBAEA,IAAI;oBACF,MAAMiB,QAAQ,GAAG,CAAC;wBACf;4BACC,IAAI,IAAI,CAAC,SAAS,CAAC,kBAAkB,EAAE;gCACrC1C,MAAM;gCACN,MAAM,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC2B,OAAO,IAAI,EAAExB;gCACrDH,MAAM;4BACR;wBACF;wBACAuB,MAAM;qBACP;gBACH,EAAE,OAAOoB,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,wCAAwC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC5E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,IAAIhB,OAAO,WAAW,EACpB,IAAI;oBACFxB,QAAQ2C,iBAAiB3C,OAAOwB,OAAO,WAAW;gBACpD,EAAE,OAAOoB,OAAY;oBACnB,MAAM,IAAIlB,MACR,CAAC,8BAA8B,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEoB,MAAM,OAAO,CAAC,cAAc,EAAEb,KAAK,SAAS,CAAC/B,QAAQ,EACtG;wBAAE,OAAO4C;oBAAM;gBAEnB;gBAGF/C,MAAM,kBAAkB2B,OAAO,IAAI;gBACnC,MAAMqB,WAAWrB,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS;gBAChD,MAAMqB,SAAS7C,OAAOoC;gBACtBvC,MAAM,iBAAiB2B,OAAO,IAAI;gBAElC,MAAMsB,mBAAmBtB,OAAO,gBAAgB,IAAI;gBACpD,IAAIsB,mBAAmB,GACrB,MAAM1B,MAAM0B;gBAGd,IAAI;oBACF,IAAI,IAAI,CAAC,SAAS,CAAC,iBAAiB,EAAE;wBACpCjD,MAAM;wBACN,MAAM,IAAI,CAAC,SAAS,CAAC,iBAAiB,CAAC2B,OAAO,IAAI,EAAExB;wBACpDH,MAAM;oBACR;gBACF,EAAE,OAAO2C,eAAoB;oBAC3B,MAAMC,kBACJD,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,OAAO,AAAD,KAAKE,OAAOF;oBACnC,MAAM,IAAId,MACR,CAAC,uCAAuC,EAAEF,OAAO,IAAI,CAAC,EAAE,EAAEiB,iBAAiB,EAC3E;wBAAE,OAAOD;oBAAc;gBAE3B;gBAEA,OAAO;oBACL,QAAQ;wBACN,SAAS;wBACT,QAAQlB;wBACR,OAAOtB;oBACT;gBACF;YACF;QACF;QAEAS,QAAQ,KAAK,CAAC,IAAI,CAAC0B;IACrB;IAEQ,iBACNvB,IAAyC,EACzCmC,mBAAiD,EACjDtC,OAAyB,EACzBuC,QAAgD,EACf;QACjC,MAAM,EAAExC,SAAS,EAAEH,WAAW,EAAE,GAAGI;QACnC,IAAIwC,cAAcF;QAElB,IAAI,AAAuB,YAAvB,OAAOE,aACTA,cAAc;YACZ,QAAQA;QACV;QAGF,IAAIzC,AAAcQ,WAAdR,WACFyC,cAAc;YACZ,GAAGA,WAAW;YACdzC;QACF;QAGF,MAAM0C,WAA4C;YAChD,MAAM;YACN,SAAS;YACT,SAASzC,QAAQ,OAAO,IAAIO;YAC5B,OAAOiC;YACP,SAASrC,KAAK,OAAO;YACrB,UAAU,OAAOZ,OAAOoC;oBAkDIe,iBACPC;gBAlDnB,MAAM,EAAEjB,IAAI,EAAE,GAAGC;gBACjB,IAAI,EAAEE,SAAS,EAAE,GAAGF;gBAEpBF,OACElC,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,MAAM,AAAD,KAAKA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,IAAI,AAAD,GAC3B,CAAC,qDAAqD,EAAE+B,KAAK,SAAS,CACpE/B,QACC;gBAGL,IAAI,CAACsC,WACHA,YAAY,MAAM,IAAI,CAAC,OAAO,CAAC,kBAAkB;gBAGnDJ,OAAOI,WAAW;gBAElB,IAAIe;gBACJ,IAAIC;gBAEJ,MAAMC,YAAY,CAACC;wBAQJC,gBACTC;oBARJ,IAAI,CAACF,MACH;oBAEFH,aAAaG;oBACbrB,KAAK,GAAG,GAAG;wBACTqB;oBACF;oBACArB,KAAK,KAAK,GAAG,QAAAsB,CAAAA,iBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,eAAe,KAAK;oBACjC,IAAI,QAAAC,CAAAA,kBAAAA,KAAK,QAAQ,AAAD,IAAZA,KAAAA,IAAAA,gBAAe,eAAe,EAChCvB,KAAK,eAAe,GAAGqB,KAAK,QAAQ,CAAC,eAAe;gBAExD;gBAGA,IAAIG;gBACJ,IAAI3D,MAAM,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,uBAAuB,EACvD2D,gBAAgB,MAAM,IAAI,CAAC,SAAS,CAAC,uBAAuB,CAAC;oBAC3D,QAAQ;wBAAC3D,MAAM,KAAK;qBAAC;gBACvB;gBAEF,MAAM4D,mBAAmBD,gBACrBE,0BAA0B;oBACxB,GAAGF,cAAc,IAAI,GAAGA,cAAc,KAAK,GAAG;oBAC9C,GAAGA,cAAc,GAAG,GAAGA,cAAc,MAAM,GAAG;gBAChD,KACA3C;gBACJ,MAAM8C,0BAA0B,CAAC,CAACF;gBAElC,MAAMG,cAAc/D,MAAM,MAAM;gBAChC,MAAMgE,oBAAoB,QAAAb,CAAAA,kBAAAA,IAAI,CAAC,SAAS,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,gBAAgB,CAACY;gBAC3D,MAAME,aAAab,QAAAA,oBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,kCAAAA,kBAAmB,YAAY,AAAD,IAA9BA,KAAAA,IAAAA,gCAAiC,KAAK;gBAEzD,MAAMc,mBAAmBJ,0BACrB,OACA,MAAMK,sBACJ;oBACE,WAAW,IAAI,CAAC,SAAS;oBACzB,mBAAmB,IAAI,CAAC,SAAS;gBACnC,GACAF,YACAF,aACA/D,MAAM,SAAS;gBAErB,MAAMoE,eAAe,CAAC,CAACF;gBAEvB,MAAMG,kBACJ,AAACP,2BAA4BM,eAEzBpD,SADAsD,qBAAqBtE;gBAE3B,MAAMuE,cAAc,CAAC,CAACF;gBAEtB,IAAIG;gBACJ,IAAI,CAACV,2BAA2B,CAACM,gBAAgB,CAACG,aAChD,IAAI;oBACFjB,eAAe,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CACtCtD,OACA;wBACE,SAASsC;oBACX,GACAjC;oBAEFkD,UAAUD,aAAa,IAAI;oBAC3BkB,sBAAsBlB,aAAa,OAAO;gBAC5C,EAAE,OAAOV,OAAO;oBACd,IAAIA,iBAAiB6B,cACnBlB,UAAUX,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAGF,MAAM8B,UACJd,oBACAM,oBACAG,mBACAG;gBAEF,IAAIG;gBACJ,IACED,WACA,IAAI,CAAC,SAAS,IACd,CAACN,gBACDpE,AAAAA,CAAAA,QAAAA,QAAAA,KAAAA,IAAAA,MAAO,SAAS,AAAD,MAAM,OAErB,IAAI,IAAI,CAAC,SAAS,CAAC,mBAAmB,EACpC,IAAI;wBAOQ4E;oBANV,MAAMC,UAAU,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB,CACtDH,QAAQ,IAAI,EACZ;wBACE,mBACE,AAAwB,YAAxB,OAAO1E,MAAM,MAAM,GACfA,MAAM,MAAM,WACZ4E,CAAAA,gBAAAA,MAAM,MAAM,AAAD,IAAXA,KAAAA,IAAAA,cAAc,MAAM;wBAC1BvE;oBACF;oBAEF,IAAIwE,WAAWC,OAAO,IAAI,CAACD,SAAS,MAAM,GAAG,GAAG;wBAC9ChF,MACE,uCACAkE,aACAc;wBAEFF,oBAAoBE;wBACpB,IAAI,CAAC,SAAS,CAAC,yBAAyB,CACtC;4BACE,MAAM;4BACN,QAAQd;4BACR,OAAOc;wBACT,GACAb;oBAEJ,OACEnE,MACE,yDACAkE;gBAGN,EAAE,OAAOnB,OAAO;oBACd/C,MAAM,kCAAkC+C;gBAC1C;qBAEA/C,MAAM;gBAIV,IAAI,CAAC6E,SAAS;oBACZ,IAAIrB,YACF,MAAM,IAAIoB,aACR,CAAC,oBAAoB,EAAEzE,MAAM,MAAM,EAAE,EACrCqD;oBAGJ,MAAM,IAAI3B,MAAM,CAAC,mBAAmB,EAAE1B,MAAM,MAAM,EAAE;gBACtD;gBAEA,IAAI+E;gBAEJ,IAAIjB,yBACFiB,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,OAAO/E,MAAM,KAAK;oBACpB;gBACF;qBACK,IAAIoE,cACTW,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACPd;wBACA,aAAaU;oBACf;gBACF;qBACK,IAAIJ,aACTQ,QAAQ;oBACN,MAAM;oBACN,SAAS;wBACP,MAAMV,QAAAA,kBAAAA,KAAAA,IAAAA,gBAAiB,IAAI;oBAC7B;gBACF;gBAGFrB,QAAAA,YAAAA,SAAW0B;gBAEX,OAAO;oBACL,QAAQ;wBACNA;oBACF;oBACAK;gBACF;YACF;QACF;QAEA,OAAO7B;IACT;IAneA,YAAY,EAAE8B,iBAAiB,EAAEC,OAAO,EAAEC,SAAS,EAAmB,CAAE;QANxE,uBAAiB,aAAjB;QAEA,uBAAiB,WAAjB;QAEA,uBAAiB,aAAjB;QAGE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC;IACnB;AAgeF"}
|
package/dist/es/agent/tasks.mjs
CHANGED
|
@@ -242,6 +242,11 @@ class TaskExecutor {
|
|
|
242
242
|
assert((null == data ? void 0 : data[keyOfResult]) !== void 0, 'No result in query data');
|
|
243
243
|
outputResult = data[keyOfResult];
|
|
244
244
|
}
|
|
245
|
+
if ('Assert' === type && !outputResult) {
|
|
246
|
+
task.usage = usage;
|
|
247
|
+
task.thought = thought;
|
|
248
|
+
throw new Error(`Assertion failed: ${thought}`);
|
|
249
|
+
}
|
|
245
250
|
return {
|
|
246
251
|
output: outputResult,
|
|
247
252
|
log: queryDump,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(plans, modelConfig, options);\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n await session.appendAndRun(task);\n\n return {\n runner: session.getRunner(),\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n const result = await session.appendAndRun(tasks);\n const { output } = result!;\n return {\n output,\n runner: session.getRunner(),\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return session.appendErrorPlan(errorMsg);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n const result = await session.appendAndRun(planningTask);\n const planResult: PlanningAIResponse = result?.output;\n if (session.isInErrorState()) {\n return {\n output: planResult,\n runner,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(plans, modelConfig, {\n cacheable,\n subTask: true,\n });\n await session.appendAndRun(executables.tasks);\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (session.isInErrorState()) {\n return {\n output: undefined,\n runner,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner: session.getRunner(),\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfig","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","actionContext","startTime","Date","vlMode","uiTarsModelVersion","undefined","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","tasks","result","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","cacheable","runner","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","Error","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA0CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,MAAMC;IAgBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAwBQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK;QACvB;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,WAAyB,EACzBJ,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACG,OAAOC,aAAaJ;IACpD;IAEA,MAAM,uBAAuBK,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMC,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACL,QAAQF,QAAQ,SAAS;QAC3B;IACF;IAEQ,mBACNF,eAAuB,EACvBS,aAAiC,EACjCV,WAAyB,EACG;QAC5B,MAAMK,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAMI,YAAYC,KAAK,GAAG;gBAC1B,MAAM,EAAEJ,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,MAAM,EAAEK,MAAM,EAAE,GAAGb;gBACnB,MAAMc,qBACJD,AAAW,kBAAXA,SAA2Bb,YAAY,kBAAkB,GAAGe;gBAE9DN,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMO,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD1B,MACE,sCACA0B,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhDR,OAAOS,MAAM,OAAO,CAACF,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAON,AAAAA,CAAAA,qBAAqBO,iBAAiBC,IAAG,EACjEhB,MAAM,eAAe,EACrB;oBACE,SAASE;oBACTE;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CM;oBACAhB;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFV,MAAM,cAAciC,KAAK,SAAS,CAACH,YAAY,MAAM;gBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;gBAEJb,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCsB;gBACF;gBACAtB,gBAAgB,IAAI,CAAC,KAAK,GAAGqB;gBAE7B,MAAMG,eAAeP,WAAW,EAAE;gBAElC,IAAIM,OAAO;oBACT,MAAME,UAAUpB,KAAK,GAAG;oBACxB,MAAMqB,gBAAgBH,QAASE,CAAAA,UAAUrB,SAAQ;oBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;gBAErC;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrBtB,OACE,CAACiB,sCAAsCI,OACvCH,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAASI;wBACTL;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACAZ;gBACF;YACF;QACF;QAEA,OAAOH;IACT;IAEA,MAAM,SACJV,KAAa,EACbI,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACR;QAC5C,MAAM,EAAEuC,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACnC,OAAOC;QAC5D,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC+B;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD;QACnB,OAAO;YACLC;YACA,QAAQjC,QAAQ,SAAS;QAC3B;IACF;IAEQ,wBAAwBkC,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACG5C,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJgD,UAAkB,EAClBxC,WAAyB,EACzBU,aAAsB,EACtB+B,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMtC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUoC;QAEzB,MAAME,SAASvC,QAAQ,SAAS;QAEhC,IAAIwC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvD7C,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAI2C,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO1C,QAAQ,eAAe,CAAC2C;YACjC;YAGA,MAAMC,eAAe,IAAI,CAAC,kBAAkB,CAC1CP,YACA9B,eACAV;YAGF,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC4C;YAC1C,MAAM3B,aAAiCe,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAIhC,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQiB;gBACRsB;YACF;YAIF,MAAM3C,QAAQqB,WAAW,OAAO,IAAI,EAAE;YACtCwB,SAAS,IAAI,IAAKxB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAI4B;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAACjD,OAAOC,aAAa;oBACnEyC;oBACA,SAAS;gBACX;gBACA,MAAMtC,QAAQ,YAAY,CAAC6C,YAAY,KAAK;YAC9C,EAAE,OAAOrB,OAAO;gBACd,OAAOxB,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEwB,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ExB,QACC;YAEP;YACA,IAAII,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQY;gBACR2B;YACF;YAIF,IAAI,CAACtB,WAAW,kCAAkC,EAChD;YAIFuB;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACAF;QACF;IACF;IAEQ,oBACNO,IAAsE,EACtEC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO5C,OAAOgD;gBACtB,MAAM,EAAEjD,IAAI,EAAE,GAAGiD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZpD,KAAK,GAAG,GAAG;wBACToD;oBACF;gBACF;gBAGA,MAAMjD,YAAY8C,YAAY,SAAS;gBACvC7C,OAAOD,WAAW;gBAElB,MAAMkD,mBAAmBT,AAAS,YAATA;gBACzB,IAAIU,cAAcT;gBAClB,IAAIU,cAAc;gBAClB,IAAIF,oBAAqBT,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEW,cAAc;oBACd,MAAMC,gBACJZ,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIS,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGX,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIY;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,KAAK,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1D7D,MAAM;oBACN,MAAM0E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,MAAM;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACA3D,aACAmD,KACAY,sBACAX;gBAEJ,EAAE,OAAOzB,OAAO;oBACd,IAAIA,iBAAiBuC,cACnBV,UAAU7B,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEwC,IAAI,EAAEvC,KAAK,EAAEwC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAIlB,AAAS,cAATA,MAEPoB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL5D,OACE0D,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACP,YAAY,AAAD,MAAM7C,QACxB;oBAEFsD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,OAAO;oBACL,QAAQS;oBACR,KAAKd;oBACL3B;oBACAwC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE6C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS3B,KAAK,SAAS,CAAC2B;QAIzD,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAlD,aACAmD,KACAC;QAGF,MAAMjB,SAAS,MAAMhC,QAAQ,YAAY,CAACkD;QAE1C,IAAI,CAAClB,QACH,MAAM,IAAImC,MACR;QAIJ,MAAM,EAAElC,MAAM,EAAEgC,OAAO,EAAE,GAAGjC;QAE5B,OAAO;YACLC;YACAgC;YACA,QAAQjE,QAAQ,SAAS;QAC3B;IACF;IAEQ,UAAUoE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;YACA,QAAQ;QACV;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBtB,GAA+B,EAC/BnD,WAAyB,EACO;QAChC,MAAM,EAAE0E,UAAU,EAAEtB,gBAAgB,EAAE,GAAGuB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMvE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWwE;QAE1B,MAAMlC,SAASvC,QAAQ,SAAS;QAChC,MAAM,EAAE0E,SAAS,EAAEC,eAAe,EAAE,GAAG3B;QAEvC1C,OAAOgE,WAAW;QAClBhE,OAAOoE,WAAW;QAClBpE,OAAOqE,iBAAiB;QAExBrE,OACEqE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBnE,KAAK,GAAG;QACjC,IAAID,YAAYC,KAAK,GAAG;QACxB,IAAIoE,eAAe;QACnB,MAAOpE,KAAK,GAAG,KAAKmE,mBAAmBF,UAAW;YAChDlE,YAAYC,KAAK,GAAG;YACpB,MAAMyC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAqB,YACA1E,aACA;gBACE,iBAAiB;YACnB,GACAoD;YAGF,MAAMjB,SAAU,MAAMhC,QAAQ,YAAY,CAACkD;YAO3C,IAAIlB,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQpB;gBACR2B;YACF;YAGFsC,eACE7C,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAEuC,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAMrE,KAAK,GAAG;YACpB,IAAIqE,MAAMtE,YAAYmE,iBAAiB;gBACrC,MAAM7C,gBAAgB6C,kBAAmBG,CAAAA,MAAMtE,SAAQ;gBACvD,MAAMuE,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQjD;gBACV;gBACA,MAAM9B,QAAQ,MAAM,CAAC+E;YACvB;QACF;QAEA,OAAO/E,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAE6E,cAAc;IACnE;IAjiBA,YACEG,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QA3BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AA8gBF"}
|
|
1
|
+
{"version":3,"file":"agent/tasks.mjs","sources":["webpack://@midscene/core/./src/agent/tasks.ts"],"sourcesContent":["import { ConversationHistory, plan, uiTarsPlanning } from '@/ai-model';\nimport type { TMultimodalPrompt, TUserPrompt } from '@/ai-model/common';\nimport type { AbstractInterface } from '@/device';\nimport type Service from '@/service';\nimport type { TaskRunner } from '@/task-runner';\nimport type {\n ExecutionTaskApply,\n ExecutionTaskInsightQueryApply,\n ExecutionTaskPlanningApply,\n ExecutionTaskProgressOptions,\n InterfaceType,\n MidsceneYamlFlowItem,\n PlanningAIResponse,\n PlanningAction,\n PlanningActionParamSleep,\n PlanningActionParamWaitFor,\n ServiceDump,\n ServiceExtractOption,\n ServiceExtractParam,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport { ExecutionSession } from './execution-session';\nimport { TaskBuilder } from './task-builder';\nimport type { TaskCache } from './task-cache';\nexport { locatePlanForLocate } from './task-builder';\nimport { descriptionOfTree } from '@midscene/shared/extractor';\nimport { taskTitleStr } from './ui-utils';\nimport { parsePrompt } from './utils';\n\ninterface ExecutionResult<OutputType = any> {\n output: OutputType;\n thought?: string;\n runner: TaskRunner;\n}\n\nconst debug = getDebug('device-task-executor');\nconst defaultReplanningCycleLimit = 10;\nconst defaultVlmUiTarsReplanningCycleLimit = 40;\n\nexport class TaskExecutor {\n interface: AbstractInterface;\n\n service: Service;\n\n taskCache?: TaskCache;\n\n private readonly taskBuilder: TaskBuilder;\n\n private conversationHistory: ConversationHistory;\n\n onTaskStartCallback?: ExecutionTaskProgressOptions['onTaskStart'];\n\n replanningCycleLimit?: number;\n\n // @deprecated use .interface instead\n get page() {\n return this.interface;\n }\n\n constructor(\n interfaceInstance: AbstractInterface,\n service: Service,\n opts: {\n taskCache?: TaskCache;\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n replanningCycleLimit?: number;\n },\n ) {\n this.interface = interfaceInstance;\n this.service = service;\n this.taskCache = opts.taskCache;\n this.onTaskStartCallback = opts?.onTaskStart;\n this.replanningCycleLimit = opts.replanningCycleLimit;\n this.conversationHistory = new ConversationHistory();\n this.taskBuilder = new TaskBuilder({\n interfaceInstance,\n service,\n taskCache: opts.taskCache,\n });\n }\n\n private createExecutionSession(\n title: string,\n options?: { tasks?: ExecutionTaskApply[] },\n ) {\n return new ExecutionSession(\n title,\n () => Promise.resolve(this.service.contextRetrieverFn()),\n {\n onTaskStart: this.onTaskStartCallback,\n tasks: options?.tasks,\n },\n );\n }\n\n public async convertPlanToExecutable(\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n options?: {\n cacheable?: boolean;\n subTask?: boolean;\n },\n ) {\n return this.taskBuilder.build(plans, modelConfig, options);\n }\n\n async loadYamlFlowAsPlanning(userInstruction: string, yamlString: string) {\n const session = this.createExecutionSession(\n taskTitleStr('Action', userInstruction),\n );\n\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'LoadYaml',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n return {\n output: {\n actions: [],\n more_actions_needed_by_instruction: false,\n log: '',\n yamlString,\n },\n cache: {\n hit: true,\n },\n hitBy: {\n from: 'Cache',\n context: {\n yamlString,\n },\n },\n };\n },\n };\n await session.appendAndRun(task);\n\n return {\n runner: session.getRunner(),\n };\n }\n\n private createPlanningTask(\n userInstruction: string,\n actionContext: string | undefined,\n modelConfig: IModelConfig,\n ): ExecutionTaskPlanningApply {\n const task: ExecutionTaskPlanningApply = {\n type: 'Planning',\n subType: 'Plan',\n locate: null,\n param: {\n userInstruction,\n },\n executor: async (param, executorContext) => {\n const startTime = Date.now();\n const { uiContext } = executorContext;\n assert(uiContext, 'uiContext is required for Planning task');\n const { vlMode } = modelConfig;\n const uiTarsModelVersion =\n vlMode === 'vlm-ui-tars' ? modelConfig.uiTarsModelVersion : undefined;\n\n assert(\n this.interface.actionSpace,\n 'actionSpace for device is not implemented',\n );\n const actionSpace = await this.interface.actionSpace();\n debug(\n 'actionSpace for this interface is:',\n actionSpace.map((action) => action.name).join(', '),\n );\n assert(Array.isArray(actionSpace), 'actionSpace must be an array');\n if (actionSpace.length === 0) {\n console.warn(\n `ActionSpace for ${this.interface.interfaceType} is empty. This may lead to unexpected behavior.`,\n );\n }\n\n const planResult = await (uiTarsModelVersion ? uiTarsPlanning : plan)(\n param.userInstruction,\n {\n context: uiContext,\n actionContext,\n interfaceType: this.interface.interfaceType as InterfaceType,\n actionSpace,\n modelConfig,\n conversationHistory: this.conversationHistory,\n },\n );\n debug('planResult', JSON.stringify(planResult, null, 2));\n\n const {\n actions,\n log,\n more_actions_needed_by_instruction,\n error,\n usage,\n rawResponse,\n sleep,\n } = planResult;\n\n executorContext.task.log = {\n ...(executorContext.task.log || {}),\n rawResponse,\n };\n executorContext.task.usage = usage;\n\n const finalActions = actions || [];\n\n if (sleep) {\n const timeNow = Date.now();\n const timeRemaining = sleep - (timeNow - startTime);\n if (timeRemaining > 0) {\n finalActions.push(this.sleepPlan(timeRemaining));\n }\n }\n\n if (finalActions.length === 0) {\n assert(\n !more_actions_needed_by_instruction || sleep,\n error ? `Failed to plan: ${error}` : 'No plan found',\n );\n }\n\n return {\n output: {\n actions: finalActions,\n more_actions_needed_by_instruction,\n log,\n yamlFlow: planResult.yamlFlow,\n },\n cache: {\n hit: false,\n },\n uiContext,\n };\n },\n };\n\n return task;\n }\n\n async runPlans(\n title: string,\n plans: PlanningAction[],\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult> {\n const session = this.createExecutionSession(title);\n const { tasks } = await this.convertPlanToExecutable(plans, modelConfig);\n const result = await session.appendAndRun(tasks);\n const { output } = result!;\n return {\n output,\n runner: session.getRunner(),\n };\n }\n\n private getReplanningCycleLimit(isVlmUiTars: boolean) {\n return (\n this.replanningCycleLimit ||\n globalConfigManager.getEnvConfigInNumber(\n MIDSCENE_REPLANNING_CYCLE_LIMIT,\n ) ||\n (isVlmUiTars\n ? defaultVlmUiTarsReplanningCycleLimit\n : defaultReplanningCycleLimit)\n );\n }\n\n async action(\n userPrompt: string,\n modelConfig: IModelConfig,\n actionContext?: string,\n cacheable?: boolean,\n ): Promise<\n ExecutionResult<\n | {\n yamlFlow?: MidsceneYamlFlowItem[]; // for cache use\n }\n | undefined\n >\n > {\n this.conversationHistory.reset();\n\n const session = this.createExecutionSession(\n taskTitleStr('Action', userPrompt),\n );\n const runner = session.getRunner();\n\n let replanCount = 0;\n const yamlFlow: MidsceneYamlFlowItem[] = [];\n const replanningCycleLimit = this.getReplanningCycleLimit(\n modelConfig.vlMode === 'vlm-ui-tars',\n );\n\n // Main planning loop - unified plan/replan logic\n while (true) {\n if (replanCount > replanningCycleLimit) {\n const errorMsg = `Replanning ${replanningCycleLimit} times, which is more than the limit, please split the task into multiple steps`;\n\n return session.appendErrorPlan(errorMsg);\n }\n\n // Create planning task (automatically includes execution history if available)\n const planningTask = this.createPlanningTask(\n userPrompt,\n actionContext,\n modelConfig,\n );\n\n const result = await session.appendAndRun(planningTask);\n const planResult: PlanningAIResponse = result?.output;\n if (session.isInErrorState()) {\n return {\n output: planResult,\n runner,\n };\n }\n\n // Execute planned actions\n const plans = planResult.actions || [];\n yamlFlow.push(...(planResult.yamlFlow || []));\n\n let executables: Awaited<ReturnType<typeof this.convertPlanToExecutable>>;\n try {\n executables = await this.convertPlanToExecutable(plans, modelConfig, {\n cacheable,\n subTask: true,\n });\n await session.appendAndRun(executables.tasks);\n } catch (error) {\n return session.appendErrorPlan(\n `Error converting plans to executable tasks: ${error}, plans: ${JSON.stringify(\n plans,\n )}`,\n );\n }\n if (session.isInErrorState()) {\n return {\n output: undefined,\n runner,\n };\n }\n\n // Check if task is complete\n if (!planResult.more_actions_needed_by_instruction) {\n break;\n }\n\n // Increment replan count for next iteration\n replanCount++;\n }\n\n return {\n output: {\n yamlFlow,\n },\n runner,\n };\n }\n\n private createTypeQueryTask(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert' | 'WaitFor',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ) {\n const queryTask: ExecutionTaskInsightQueryApply = {\n type: 'Insight',\n subType: type,\n locate: null,\n param: {\n dataDemand: multimodalPrompt\n ? ({\n demand,\n multimodalPrompt,\n } as never)\n : demand, // for user param presentation in report right sidebar\n },\n executor: async (param, taskContext) => {\n const { task } = taskContext;\n let queryDump: ServiceDump | undefined;\n const applyDump = (dump: ServiceDump) => {\n queryDump = dump;\n task.log = {\n dump,\n };\n };\n\n // Get context for query operations\n const uiContext = taskContext.uiContext;\n assert(uiContext, 'uiContext is required for Query task');\n\n const ifTypeRestricted = type !== 'Query';\n let demandInput = demand;\n let keyOfResult = 'result';\n if (ifTypeRestricted && (type === 'Assert' || type === 'WaitFor')) {\n keyOfResult = 'StatementIsTruthy';\n const booleanPrompt =\n type === 'Assert'\n ? `Boolean, whether the following statement is true: ${demand}`\n : `Boolean, the user wants to do some 'wait for' operation, please check whether the following statement is true: ${demand}`;\n demandInput = {\n [keyOfResult]: booleanPrompt,\n };\n } else if (ifTypeRestricted) {\n demandInput = {\n [keyOfResult]: `${type}, ${demand}`,\n };\n }\n\n let extractResult;\n\n let extraPageDescription = '';\n if (opt?.domIncluded && this.interface.getElementsNodeTree) {\n debug('appending tree info for page');\n const tree = await this.interface.getElementsNodeTree();\n extraPageDescription = await descriptionOfTree(\n tree,\n 200,\n false,\n opt?.domIncluded === 'visible-only',\n );\n }\n\n try {\n extractResult = await this.service.extract<any>(\n demandInput,\n modelConfig,\n opt,\n extraPageDescription,\n multimodalPrompt,\n );\n } catch (error) {\n if (error instanceof ServiceError) {\n applyDump(error.dump);\n }\n throw error;\n }\n\n const { data, usage, thought, dump } = extractResult;\n applyDump(dump);\n\n let outputResult = data;\n if (ifTypeRestricted) {\n // If AI returned a plain string instead of structured format, use it directly\n if (typeof data === 'string') {\n outputResult = data;\n } else if (type === 'WaitFor') {\n if (data === null || data === undefined) {\n outputResult = false;\n } else {\n outputResult = (data as any)[keyOfResult];\n }\n } else if (data === null || data === undefined) {\n outputResult = null;\n } else {\n assert(\n data?.[keyOfResult] !== undefined,\n 'No result in query data',\n );\n outputResult = (data as any)[keyOfResult];\n }\n }\n\n if (type === 'Assert' && !outputResult) {\n task.usage = usage;\n task.thought = thought;\n throw new Error(`Assertion failed: ${thought}`);\n }\n\n return {\n output: outputResult,\n log: queryDump,\n usage,\n thought,\n };\n },\n };\n\n return queryTask;\n }\n async createTypeQueryExecution<T>(\n type: 'Query' | 'Boolean' | 'Number' | 'String' | 'Assert',\n demand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ExecutionResult<T>> {\n const session = this.createExecutionSession(\n taskTitleStr(\n type,\n typeof demand === 'string' ? demand : JSON.stringify(demand),\n ),\n );\n\n const queryTask = await this.createTypeQueryTask(\n type,\n demand,\n modelConfig,\n opt,\n multimodalPrompt,\n );\n\n const result = await session.appendAndRun(queryTask);\n\n if (!result) {\n throw new Error(\n 'result of taskExecutor.flush() is undefined in function createTypeQueryTask',\n );\n }\n\n const { output, thought } = result;\n\n return {\n output,\n thought,\n runner: session.getRunner(),\n };\n }\n\n private sleepPlan(timeMs: number): PlanningAction<PlanningActionParamSleep> {\n return {\n type: 'Sleep',\n param: {\n timeMs,\n },\n locate: null,\n };\n }\n\n async taskForSleep(timeMs: number, _modelConfig: IModelConfig) {\n return this.taskBuilder.createSleepTask({\n timeMs,\n });\n }\n\n async waitFor(\n assertion: TUserPrompt,\n opt: PlanningActionParamWaitFor,\n modelConfig: IModelConfig,\n ): Promise<ExecutionResult<void>> {\n const { textPrompt, multimodalPrompt } = parsePrompt(assertion);\n\n const description = `waitFor: ${textPrompt}`;\n const session = this.createExecutionSession(\n taskTitleStr('WaitFor', description),\n );\n const runner = session.getRunner();\n const { timeoutMs, checkIntervalMs } = opt;\n\n assert(assertion, 'No assertion for waitFor');\n assert(timeoutMs, 'No timeoutMs for waitFor');\n assert(checkIntervalMs, 'No checkIntervalMs for waitFor');\n\n assert(\n checkIntervalMs <= timeoutMs,\n `wrong config for waitFor: checkIntervalMs must be less than timeoutMs, config: {checkIntervalMs: ${checkIntervalMs}, timeoutMs: ${timeoutMs}}`,\n );\n\n const overallStartTime = Date.now();\n let startTime = Date.now();\n let errorThought = '';\n while (Date.now() - overallStartTime < timeoutMs) {\n startTime = Date.now();\n const queryTask = await this.createTypeQueryTask(\n 'WaitFor',\n textPrompt,\n modelConfig,\n {\n doNotThrowError: true,\n },\n multimodalPrompt,\n );\n\n const result = (await session.appendAndRun(queryTask)) as\n | {\n output: boolean;\n thought?: string;\n }\n | undefined;\n\n if (result?.output) {\n return {\n output: undefined,\n runner,\n };\n }\n\n errorThought =\n result?.thought ||\n (!result && `No result from assertion: ${textPrompt}`) ||\n `unknown error when waiting for assertion: ${textPrompt}`;\n const now = Date.now();\n if (now - startTime < checkIntervalMs) {\n const timeRemaining = checkIntervalMs - (now - startTime);\n const sleepTask = this.taskBuilder.createSleepTask({\n timeMs: timeRemaining,\n });\n await session.append(sleepTask);\n }\n }\n\n return session.appendErrorPlan(`waitFor timeout: ${errorThought}`);\n }\n}\n"],"names":["debug","getDebug","defaultReplanningCycleLimit","defaultVlmUiTarsReplanningCycleLimit","TaskExecutor","title","options","ExecutionSession","Promise","plans","modelConfig","userInstruction","yamlString","session","taskTitleStr","task","param","executorContext","uiContext","assert","actionContext","startTime","Date","vlMode","uiTarsModelVersion","undefined","actionSpace","action","Array","console","planResult","uiTarsPlanning","plan","JSON","actions","log","more_actions_needed_by_instruction","error","usage","rawResponse","sleep","finalActions","timeNow","timeRemaining","tasks","result","output","isVlmUiTars","globalConfigManager","MIDSCENE_REPLANNING_CYCLE_LIMIT","userPrompt","cacheable","runner","replanCount","yamlFlow","replanningCycleLimit","errorMsg","planningTask","executables","type","demand","opt","multimodalPrompt","queryTask","taskContext","queryDump","applyDump","dump","ifTypeRestricted","demandInput","keyOfResult","booleanPrompt","extractResult","extraPageDescription","tree","descriptionOfTree","ServiceError","data","thought","outputResult","Error","timeMs","_modelConfig","assertion","textPrompt","parsePrompt","description","timeoutMs","checkIntervalMs","overallStartTime","errorThought","now","sleepTask","interfaceInstance","service","opts","ConversationHistory","TaskBuilder"],"mappings":";;;;;;;;;;;;;;;;;;;;AA0CA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,8BAA8B;AACpC,MAAMC,uCAAuC;AAEtC,MAAMC;IAgBX,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,SAAS;IACvB;IAwBQ,uBACNC,KAAa,EACbC,OAA0C,EAC1C;QACA,OAAO,IAAIC,iBACTF,OACA,IAAMG,QAAQ,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kBAAkB,KACrD;YACE,aAAa,IAAI,CAAC,mBAAmB;YACrC,OAAOF,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK;QACvB;IAEJ;IAEA,MAAa,wBACXG,KAAuB,EACvBC,WAAyB,EACzBJ,OAGC,EACD;QACA,OAAO,IAAI,CAAC,WAAW,CAAC,KAAK,CAACG,OAAOC,aAAaJ;IACpD;IAEA,MAAM,uBAAuBK,eAAuB,EAAEC,UAAkB,EAAE;QACxE,MAAMC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUH;QAGzB,MAAMI,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAM,EAAEC,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,OAAO;oBACL,QAAQ;wBACN,SAAS,EAAE;wBACX,oCAAoC;wBACpC,KAAK;wBACLN;oBACF;oBACA,OAAO;wBACL,KAAK;oBACP;oBACA,OAAO;wBACL,MAAM;wBACN,SAAS;4BACPA;wBACF;oBACF;gBACF;YACF;QACF;QACA,MAAMC,QAAQ,YAAY,CAACE;QAE3B,OAAO;YACL,QAAQF,QAAQ,SAAS;QAC3B;IACF;IAEQ,mBACNF,eAAuB,EACvBS,aAAiC,EACjCV,WAAyB,EACG;QAC5B,MAAMK,OAAmC;YACvC,MAAM;YACN,SAAS;YACT,QAAQ;YACR,OAAO;gBACLJ;YACF;YACA,UAAU,OAAOK,OAAOC;gBACtB,MAAMI,YAAYC,KAAK,GAAG;gBAC1B,MAAM,EAAEJ,SAAS,EAAE,GAAGD;gBACtBE,OAAOD,WAAW;gBAClB,MAAM,EAAEK,MAAM,EAAE,GAAGb;gBACnB,MAAMc,qBACJD,AAAW,kBAAXA,SAA2Bb,YAAY,kBAAkB,GAAGe;gBAE9DN,OACE,IAAI,CAAC,SAAS,CAAC,WAAW,EAC1B;gBAEF,MAAMO,cAAc,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW;gBACpD1B,MACE,sCACA0B,YAAY,GAAG,CAAC,CAACC,SAAWA,OAAO,IAAI,EAAE,IAAI,CAAC;gBAEhDR,OAAOS,MAAM,OAAO,CAACF,cAAc;gBACnC,IAAIA,AAAuB,MAAvBA,YAAY,MAAM,EACpBG,QAAQ,IAAI,CACV,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,gDAAgD,CAAC;gBAIrG,MAAMC,aAAa,MAAON,AAAAA,CAAAA,qBAAqBO,iBAAiBC,IAAG,EACjEhB,MAAM,eAAe,EACrB;oBACE,SAASE;oBACTE;oBACA,eAAe,IAAI,CAAC,SAAS,CAAC,aAAa;oBAC3CM;oBACAhB;oBACA,qBAAqB,IAAI,CAAC,mBAAmB;gBAC/C;gBAEFV,MAAM,cAAciC,KAAK,SAAS,CAACH,YAAY,MAAM;gBAErD,MAAM,EACJI,OAAO,EACPC,GAAG,EACHC,kCAAkC,EAClCC,KAAK,EACLC,KAAK,EACLC,WAAW,EACXC,KAAK,EACN,GAAGV;gBAEJb,gBAAgB,IAAI,CAAC,GAAG,GAAG;oBACzB,GAAIA,gBAAgB,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBAClCsB;gBACF;gBACAtB,gBAAgB,IAAI,CAAC,KAAK,GAAGqB;gBAE7B,MAAMG,eAAeP,WAAW,EAAE;gBAElC,IAAIM,OAAO;oBACT,MAAME,UAAUpB,KAAK,GAAG;oBACxB,MAAMqB,gBAAgBH,QAASE,CAAAA,UAAUrB,SAAQ;oBACjD,IAAIsB,gBAAgB,GAClBF,aAAa,IAAI,CAAC,IAAI,CAAC,SAAS,CAACE;gBAErC;gBAEA,IAAIF,AAAwB,MAAxBA,aAAa,MAAM,EACrBtB,OACE,CAACiB,sCAAsCI,OACvCH,QAAQ,CAAC,gBAAgB,EAAEA,OAAO,GAAG;gBAIzC,OAAO;oBACL,QAAQ;wBACN,SAASI;wBACTL;wBACAD;wBACA,UAAUL,WAAW,QAAQ;oBAC/B;oBACA,OAAO;wBACL,KAAK;oBACP;oBACAZ;gBACF;YACF;QACF;QAEA,OAAOH;IACT;IAEA,MAAM,SACJV,KAAa,EACbI,KAAuB,EACvBC,WAAyB,EACC;QAC1B,MAAMG,UAAU,IAAI,CAAC,sBAAsB,CAACR;QAC5C,MAAM,EAAEuC,KAAK,EAAE,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAACnC,OAAOC;QAC5D,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC+B;QAC1C,MAAM,EAAEE,MAAM,EAAE,GAAGD;QACnB,OAAO;YACLC;YACA,QAAQjC,QAAQ,SAAS;QAC3B;IACF;IAEQ,wBAAwBkC,WAAoB,EAAE;QACpD,OACE,IAAI,CAAC,oBAAoB,IACzBC,oBAAoB,oBAAoB,CACtCC,oCAEDF,CAAAA,cACG5C,uCACAD,2BAA0B;IAElC;IAEA,MAAM,OACJgD,UAAkB,EAClBxC,WAAyB,EACzBU,aAAsB,EACtB+B,SAAmB,EAQnB;QACA,IAAI,CAAC,mBAAmB,CAAC,KAAK;QAE9B,MAAMtC,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,UAAUoC;QAEzB,MAAME,SAASvC,QAAQ,SAAS;QAEhC,IAAIwC,cAAc;QAClB,MAAMC,WAAmC,EAAE;QAC3C,MAAMC,uBAAuB,IAAI,CAAC,uBAAuB,CACvD7C,AAAuB,kBAAvBA,YAAY,MAAM;QAIpB,MAAO,KAAM;YACX,IAAI2C,cAAcE,sBAAsB;gBACtC,MAAMC,WAAW,CAAC,WAAW,EAAED,qBAAqB,+EAA+E,CAAC;gBAEpI,OAAO1C,QAAQ,eAAe,CAAC2C;YACjC;YAGA,MAAMC,eAAe,IAAI,CAAC,kBAAkB,CAC1CP,YACA9B,eACAV;YAGF,MAAMmC,SAAS,MAAMhC,QAAQ,YAAY,CAAC4C;YAC1C,MAAM3B,aAAiCe,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM;YACrD,IAAIhC,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQiB;gBACRsB;YACF;YAIF,MAAM3C,QAAQqB,WAAW,OAAO,IAAI,EAAE;YACtCwB,SAAS,IAAI,IAAKxB,WAAW,QAAQ,IAAI,EAAE;YAE3C,IAAI4B;YACJ,IAAI;gBACFA,cAAc,MAAM,IAAI,CAAC,uBAAuB,CAACjD,OAAOC,aAAa;oBACnEyC;oBACA,SAAS;gBACX;gBACA,MAAMtC,QAAQ,YAAY,CAAC6C,YAAY,KAAK;YAC9C,EAAE,OAAOrB,OAAO;gBACd,OAAOxB,QAAQ,eAAe,CAC5B,CAAC,4CAA4C,EAAEwB,MAAM,SAAS,EAAEJ,KAAK,SAAS,CAC5ExB,QACC;YAEP;YACA,IAAII,QAAQ,cAAc,IACxB,OAAO;gBACL,QAAQY;gBACR2B;YACF;YAIF,IAAI,CAACtB,WAAW,kCAAkC,EAChD;YAIFuB;QACF;QAEA,OAAO;YACL,QAAQ;gBACNC;YACF;YACAF;QACF;IACF;IAEQ,oBACNO,IAAsE,EACtEC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACpC;QACA,MAAMC,YAA4C;YAChD,MAAM;YACN,SAASJ;YACT,QAAQ;YACR,OAAO;gBACL,YAAYG,mBACP;oBACCF;oBACAE;gBACF,IACAF;YACN;YACA,UAAU,OAAO5C,OAAOgD;gBACtB,MAAM,EAAEjD,IAAI,EAAE,GAAGiD;gBACjB,IAAIC;gBACJ,MAAMC,YAAY,CAACC;oBACjBF,YAAYE;oBACZpD,KAAK,GAAG,GAAG;wBACToD;oBACF;gBACF;gBAGA,MAAMjD,YAAY8C,YAAY,SAAS;gBACvC7C,OAAOD,WAAW;gBAElB,MAAMkD,mBAAmBT,AAAS,YAATA;gBACzB,IAAIU,cAAcT;gBAClB,IAAIU,cAAc;gBAClB,IAAIF,oBAAqBT,CAAAA,AAAS,aAATA,QAAqBA,AAAS,cAATA,IAAiB,GAAI;oBACjEW,cAAc;oBACd,MAAMC,gBACJZ,AAAS,aAATA,OACI,CAAC,kDAAkD,EAAEC,QAAQ,GAC7D,CAAC,+GAA+G,EAAEA,QAAQ;oBAChIS,cAAc;wBACZ,CAACC,YAAY,EAAEC;oBACjB;gBACF,OAAO,IAAIH,kBACTC,cAAc;oBACZ,CAACC,YAAY,EAAE,GAAGX,KAAK,EAAE,EAAEC,QAAQ;gBACrC;gBAGF,IAAIY;gBAEJ,IAAIC,uBAAuB;gBAC3B,IAAIZ,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,KAAK,IAAI,CAAC,SAAS,CAAC,mBAAmB,EAAE;oBAC1D7D,MAAM;oBACN,MAAM0E,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,mBAAmB;oBACrDD,uBAAuB,MAAME,kBAC3BD,MACA,KACA,OACAb,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,WAAW,AAAD,MAAM;gBAEzB;gBAEA,IAAI;oBACFW,gBAAgB,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CACxCH,aACA3D,aACAmD,KACAY,sBACAX;gBAEJ,EAAE,OAAOzB,OAAO;oBACd,IAAIA,iBAAiBuC,cACnBV,UAAU7B,MAAM,IAAI;oBAEtB,MAAMA;gBACR;gBAEA,MAAM,EAAEwC,IAAI,EAAEvC,KAAK,EAAEwC,OAAO,EAAEX,IAAI,EAAE,GAAGK;gBACvCN,UAAUC;gBAEV,IAAIY,eAAeF;gBACnB,IAAIT,kBAEF,IAAI,AAAgB,YAAhB,OAAOS,MACTE,eAAeF;qBACV,IAAIlB,AAAS,cAATA,MAEPoB,eADEF,QAAAA,OACa,QAECA,IAAY,CAACP,YAAY;qBAEtC,IAAIO,QAAAA,MACTE,eAAe;qBACV;oBACL5D,OACE0D,AAAAA,CAAAA,QAAAA,OAAAA,KAAAA,IAAAA,IAAM,CAACP,YAAY,AAAD,MAAM7C,QACxB;oBAEFsD,eAAgBF,IAAY,CAACP,YAAY;gBAC3C;gBAGF,IAAIX,AAAS,aAATA,QAAqB,CAACoB,cAAc;oBACtChE,KAAK,KAAK,GAAGuB;oBACbvB,KAAK,OAAO,GAAG+D;oBACf,MAAM,IAAIE,MAAM,CAAC,kBAAkB,EAAEF,SAAS;gBAChD;gBAEA,OAAO;oBACL,QAAQC;oBACR,KAAKd;oBACL3B;oBACAwC;gBACF;YACF;QACF;QAEA,OAAOf;IACT;IACA,MAAM,yBACJJ,IAA0D,EAC1DC,MAA2B,EAC3BlD,WAAyB,EACzBmD,GAA0B,EAC1BC,gBAAoC,EACP;QAC7B,MAAMjD,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aACE6C,MACA,AAAkB,YAAlB,OAAOC,SAAsBA,SAAS3B,KAAK,SAAS,CAAC2B;QAIzD,MAAMG,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9CJ,MACAC,QACAlD,aACAmD,KACAC;QAGF,MAAMjB,SAAS,MAAMhC,QAAQ,YAAY,CAACkD;QAE1C,IAAI,CAAClB,QACH,MAAM,IAAImC,MACR;QAIJ,MAAM,EAAElC,MAAM,EAAEgC,OAAO,EAAE,GAAGjC;QAE5B,OAAO;YACLC;YACAgC;YACA,QAAQjE,QAAQ,SAAS;QAC3B;IACF;IAEQ,UAAUoE,MAAc,EAA4C;QAC1E,OAAO;YACL,MAAM;YACN,OAAO;gBACLA;YACF;YACA,QAAQ;QACV;IACF;IAEA,MAAM,aAAaA,MAAc,EAAEC,YAA0B,EAAE;QAC7D,OAAO,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;YACtCD;QACF;IACF;IAEA,MAAM,QACJE,SAAsB,EACtBtB,GAA+B,EAC/BnD,WAAyB,EACO;QAChC,MAAM,EAAE0E,UAAU,EAAEtB,gBAAgB,EAAE,GAAGuB,YAAYF;QAErD,MAAMG,cAAc,CAAC,SAAS,EAAEF,YAAY;QAC5C,MAAMvE,UAAU,IAAI,CAAC,sBAAsB,CACzCC,aAAa,WAAWwE;QAE1B,MAAMlC,SAASvC,QAAQ,SAAS;QAChC,MAAM,EAAE0E,SAAS,EAAEC,eAAe,EAAE,GAAG3B;QAEvC1C,OAAOgE,WAAW;QAClBhE,OAAOoE,WAAW;QAClBpE,OAAOqE,iBAAiB;QAExBrE,OACEqE,mBAAmBD,WACnB,CAAC,iGAAiG,EAAEC,gBAAgB,aAAa,EAAED,UAAU,CAAC,CAAC;QAGjJ,MAAME,mBAAmBnE,KAAK,GAAG;QACjC,IAAID,YAAYC,KAAK,GAAG;QACxB,IAAIoE,eAAe;QACnB,MAAOpE,KAAK,GAAG,KAAKmE,mBAAmBF,UAAW;YAChDlE,YAAYC,KAAK,GAAG;YACpB,MAAMyC,YAAY,MAAM,IAAI,CAAC,mBAAmB,CAC9C,WACAqB,YACA1E,aACA;gBACE,iBAAiB;YACnB,GACAoD;YAGF,MAAMjB,SAAU,MAAMhC,QAAQ,YAAY,CAACkD;YAO3C,IAAIlB,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,EAChB,OAAO;gBACL,QAAQpB;gBACR2B;YACF;YAGFsC,eACE7C,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,OAAO,AAAD,KACb,CAACA,UAAU,CAAC,0BAA0B,EAAEuC,YAAY,IACrD,CAAC,0CAA0C,EAAEA,YAAY;YAC3D,MAAMO,MAAMrE,KAAK,GAAG;YACpB,IAAIqE,MAAMtE,YAAYmE,iBAAiB;gBACrC,MAAM7C,gBAAgB6C,kBAAmBG,CAAAA,MAAMtE,SAAQ;gBACvD,MAAMuE,YAAY,IAAI,CAAC,WAAW,CAAC,eAAe,CAAC;oBACjD,QAAQjD;gBACV;gBACA,MAAM9B,QAAQ,MAAM,CAAC+E;YACvB;QACF;QAEA,OAAO/E,QAAQ,eAAe,CAAC,CAAC,iBAAiB,EAAE6E,cAAc;IACnE;IAviBA,YACEG,iBAAoC,EACpCC,OAAgB,EAChBC,IAIC,CACD;QA3BF;QAEA;QAEA;QAEA,uBAAiB,eAAjB;QAEA,uBAAQ,uBAAR;QAEA;QAEA;QAgBE,IAAI,CAAC,SAAS,GAAGF;QACjB,IAAI,CAAC,OAAO,GAAGC;QACf,IAAI,CAAC,SAAS,GAAGC,KAAK,SAAS;QAC/B,IAAI,CAAC,mBAAmB,GAAGA,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,WAAW;QAC5C,IAAI,CAAC,oBAAoB,GAAGA,KAAK,oBAAoB;QACrD,IAAI,CAAC,mBAAmB,GAAG,IAAIC;QAC/B,IAAI,CAAC,WAAW,GAAG,IAAIC,YAAY;YACjCJ;YACAC;YACA,WAAWC,KAAK,SAAS;QAC3B;IACF;AAohBF"}
|
package/dist/es/agent/utils.mjs
CHANGED
|
@@ -99,7 +99,7 @@ async function matchElementFromCache(context, cacheEntry, cachePrompt, cacheable
|
|
|
99
99
|
return;
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
|
-
const getMidsceneVersion = ()=>"1.0.1-beta-
|
|
102
|
+
const getMidsceneVersion = ()=>"1.0.1-beta-20251029093754.0";
|
|
103
103
|
const parsePrompt = (prompt)=>{
|
|
104
104
|
if ('string' == typeof prompt) return {
|
|
105
105
|
textPrompt: prompt,
|
|
@@ -3,9 +3,9 @@ import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
|
3
3
|
import { describeUserPage } from "./prompt/util.mjs";
|
|
4
4
|
import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
|
|
5
5
|
import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
|
|
6
|
-
import { AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
6
|
+
import { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
7
7
|
import { plan } from "./llm-planning.mjs";
|
|
8
8
|
import { AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, dumpActionParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, loadActionParam, parseActionParam } from "./common.mjs";
|
|
9
9
|
import { resizeImageForUiTars, uiTarsPlanning } from "./ui-tars-planning.mjs";
|
|
10
10
|
import { ConversationHistory } from "./conversation-history.mjs";
|
|
11
|
-
export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, parseActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, uiTarsPlanning };
|
|
11
|
+
export { AIActionType, AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, loadActionParam, parseActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, uiTarsPlanning };
|
|
@@ -6,7 +6,7 @@ import { AIActionType, adaptBboxToRect, expandSearchArea, mergeRects } from "./c
|
|
|
6
6
|
import { extractDataQueryPrompt, systemPromptToExtract } from "./prompt/extraction.mjs";
|
|
7
7
|
import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
8
8
|
import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
|
|
9
|
-
import {
|
|
9
|
+
import { orderSensitiveJudgePrompt, systemPromptToJudgeOrderSensitive } from "./prompt/order-sensitive-judge.mjs";
|
|
10
10
|
import { callAIWithObjectResponse } from "./service-caller/index.mjs";
|
|
11
11
|
const debugInspect = getDebug('ai:inspect');
|
|
12
12
|
const debugSection = getDebug('ai:section');
|
|
@@ -60,7 +60,6 @@ async function AiLocateElement(options) {
|
|
|
60
60
|
const { screenshotBase64 } = context;
|
|
61
61
|
assert(targetElementDescription, "cannot find the target element description");
|
|
62
62
|
const userInstructionPrompt = await findElementPrompt.format({
|
|
63
|
-
pageDescription: await describeUserPage(context),
|
|
64
63
|
targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
|
|
65
64
|
});
|
|
66
65
|
const systemPrompt = systemPromptToLocateElement(vlMode);
|
|
@@ -129,7 +128,6 @@ async function AiLocateElement(options) {
|
|
|
129
128
|
};
|
|
130
129
|
const element = generateElementByPosition(rectCenter);
|
|
131
130
|
errors = [];
|
|
132
|
-
element.isOrderSensitive = 'object' == typeof res.content && null !== res.content && 'isOrderSensitive' in res.content ? res.content.isOrderSensitive : void 0;
|
|
133
131
|
if (element) matchedElements = [
|
|
134
132
|
element
|
|
135
133
|
];
|
|
@@ -261,6 +259,25 @@ async function AiExtractElementInfo(options) {
|
|
|
261
259
|
usage: result.usage
|
|
262
260
|
};
|
|
263
261
|
}
|
|
264
|
-
|
|
262
|
+
async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
|
|
263
|
+
const systemPrompt = systemPromptToJudgeOrderSensitive();
|
|
264
|
+
const userPrompt = orderSensitiveJudgePrompt(description);
|
|
265
|
+
const msgs = [
|
|
266
|
+
{
|
|
267
|
+
role: 'system',
|
|
268
|
+
content: systemPrompt
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
role: 'user',
|
|
272
|
+
content: userPrompt
|
|
273
|
+
}
|
|
274
|
+
];
|
|
275
|
+
const result = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);
|
|
276
|
+
return {
|
|
277
|
+
isOrderSensitive: result.content.isOrderSensitive ?? false,
|
|
278
|
+
usage: result.usage
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
export { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection };
|
|
265
282
|
|
|
266
283
|
//# sourceMappingURL=inspect.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: await describeUserPage(context),\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement =\n generateElementByPosition(rectCenter);\n errors = [];\n\n element.isOrderSensitive =\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined;\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = await extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","userInstructionPrompt","findElementPrompt","describeUserPage","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","generateElementByPosition","undefined","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;;;;;;;;AAmDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IASC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,iBAAiB,MAAMC,iBAAiBT;QACxC,0BAA0BX,wBAAwBY;IACpD;IACA,MAAMS,eAAeC,4BAA4BP;IAEjD,IAAIQ,eAAeP;IACnB,IAAIQ,aAAab,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIc,cAAcd,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIe,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIf,QAAQ,YAAY,EAAE;YAWXkB,4BACCC;QAXdZ,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFa,eAAeb,QAAQ,YAAY,CAAC,WAAW;QAC/Cc,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIV,AAAW,cAAXA,QAAsB;QAC/B,MAAMe,eAAe,MAAMC,4BAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC;IAEA,MAAMzB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMoB,SAAS,MAAM9B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMC,MAAM,MAAMpB,SAASR,MAAM6B,aAAa,eAAe,EAAEpB;IAE/D,MAAMqB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,gBACRV,IAAI,OAAO,CAAC,IAAI,EAChBT,YACAC,aAAAA,QACAgB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BhB,oBACAC,qBACAZ;YAGFlB,aAAa,WAAWwC;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMQ,UACJC,0BAA0BF;YAC5BL,SAAS,EAAE;YAEXM,QAAQ,gBAAgB,GACtB,AAAuB,YAAvB,OAAOZ,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCc;YAEN,IAAIF,SACFP,kBAAkB;gBAACO;aAAQ;QAE/B;IACF,EAAE,OAAOG,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACT,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEU,IAAI,CAAC,CAAC;aAFtBV,SAAS;YAACU;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMZ;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;IAClB;AACF;AAEO,eAAekB,gBAAgBzC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEyC,kBAAkB,EAAEtC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMU,eAAegC,4BAA4BtC;IACjD,MAAMuC,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoBvD,wBAAwBoD;IAC9C;IACA,MAAM/C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKL;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMsC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMpB,SAAS,MAAM9B,mBAAmB;YACtC,QAAQkD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA/C,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMwB,SAAS,MAAMC,yBACnBpD,MACA6B,aAAa,YAAY,EACzBpB;IAGF,IAAI4C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAajB,gBACjBgB,aACAhD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0B6D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DzD,aAAa,wBAAwB8D;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAASvB,MAAM,OAAO,CAACuB,OAC/B,GAAG,CAAC,CAACA,OACGpB,gBACLoB,MACApD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqB+D;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D/D,aAAa,iBAAiBiE;QAG9BN,cAAcQ,iBAAiBF,YAAYrD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B2D;IAC1C;IAEA,IAAIS,cAAcnD;IAClB,IAAI0C,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BrD,kBACA0C,aACA3C,AAAW,cAAXA;QAEFoD,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAapB,KAAK,SAAS,CAACoB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwB5D,OAO7C;IACC,MAAM,EAAE6D,SAAS,EAAE5D,OAAO,EAAE6D,aAAa,EAAErE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAMW,eAAeoD;IACrB,MAAM,EAAEzD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM+D,wBAAwB,MAAMC,uBAClCjE,QAAQ,eAAe,IAAI,IAC3B6D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK5D;YACL,QAAQ;QACV;IACF;IAGF4D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMrE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASgB;QAAa;QACxC;YACE,MAAM;YACN,SAASuD;QACX;KACD;IAED,IAAIzE,kBAAkB;QACpB,MAAM6B,SAAS,MAAM9B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI2B;IACf;IAEA,MAAMwB,SAAS,MAAMC,yBACnBpD,MACA6B,aAAa,YAAY,EACzBpB;IAEF,OAAO;QACL,aAAa0C,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.mjs","sources":["webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["import type {\n AIDataExtractionResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n Rect,\n ReferenceImage,\n ServiceExtractOption,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { generateElementByPosition } from '@midscene/shared/extractor/dom-util';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport type { LocateResultElement } from '@midscene/shared/types';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n orderSensitiveJudgePrompt,\n systemPromptToJudgeOrderSensitive,\n} from './prompt/order-sensitive-judge';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement(options: {\n context: UIContext;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: {\n elements: LocateResultElement[];\n errors?: string[];\n };\n rect?: Rect;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements = 'elements' in res.content ? res.content.elements : [];\n let errors: string[] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n\n const element: LocateResultElement =\n generateElementByPosition(rectCenter);\n errors = [];\n\n if (element) {\n matchedElements = [element];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements as LocateResultElement[],\n errors: errors as string[],\n },\n rawResponse,\n usage: res.usage,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<T>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext;\n pageDescription?: string;\n extractOption?: ServiceExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const systemPrompt = systemPromptToExtract();\n const { screenshotBase64 } = context;\n\n const extractDataPromptText = await extractDataQueryPrompt(\n options.pageDescription || '',\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n usage: result.usage,\n };\n}\n\nexport async function AiJudgeOrderSensitive(\n description: string,\n callAIFn: typeof callAIWithObjectResponse<{ isOrderSensitive: boolean }>,\n modelConfig: IModelConfig,\n): Promise<{\n isOrderSensitive: boolean;\n usage?: AIUsageInfo;\n}> {\n const systemPrompt = systemPromptToJudgeOrderSensitive();\n const userPrompt = orderSensitiveJudgePrompt(description);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userPrompt,\n },\n ];\n\n const result = await callAIFn(\n msgs,\n AIActionType.INSPECT_ELEMENT, // Reuse existing action type for now\n modelConfig,\n );\n\n return {\n isOrderSensitive: result.content.isOrderSensitive ?? false,\n usage: result.usage,\n };\n}\n"],"names":["debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","generateElementByPosition","e","msg","Error","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiJudgeOrderSensitive","description","systemPromptToJudgeOrderSensitive","userPrompt","orderSensitiveJudgePrompt"],"mappings":";;;;;;;;;;AAsDA,MAAMA,eAAeC,SAAS;AAC9B,MAAMC,eAAeD,SAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,mBACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAAgBC,OASrC;IASC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7BM,OACEL,0BACA;IAEF,MAAMM,wBAAwB,MAAMC,kBAAkB,MAAM,CAAC;QAC3D,0BAA0BnB,wBAAwBY;IACpD;IACA,MAAMQ,eAAeC,4BAA4BN;IAEjD,IAAIO,eAAeN;IACnB,IAAIO,aAAaZ,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIa,cAAcb,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIc,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAId,QAAQ,YAAY,EAAE;YAWXiB,4BACCC;QAXdX,OACEP,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFO,OACEP,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFY,eAAeZ,QAAQ,YAAY,CAAC,WAAW;QAC/Ca,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIT,AAAW,cAAXA,QAAsB;QAC/B,MAAMc,eAAe,MAAMC,4BAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC;IAEA,MAAMxB,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAON,0BAAuC;QAChD,MAAMmB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMC,MAAM,MAAMnB,SAASR,MAAM4B,aAAa,eAAe,EAAEnB;IAE/D,MAAMoB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBAAkB,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IAC3E,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,gBACRV,IAAI,OAAO,CAAC,IAAI,EAChBT,YACAC,aAAAA,QACAgB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BhB,oBACAC,qBACAX;YAGFlB,aAAa,WAAWuC;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YAEA,MAAMQ,UACJC,0BAA0BF;YAC5BL,SAAS,EAAE;YAEX,IAAIM,SACFP,kBAAkB;gBAACO;aAAQ;QAE/B;IACF,EAAE,OAAOE,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACR,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAES,IAAI,CAAC,CAAC;aAFtBT,SAAS;YAACS;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMX;QACN,aAAa;YACX,UAAUC;YACV,QAAQC;QACV;QACAJ;QACA,OAAOF,IAAI,KAAK;IAClB;AACF;AAEO,eAAeiB,gBAAgBvC,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEuC,kBAAkB,EAAEpC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMS,eAAe+B,4BAA4BpC;IACjD,MAAMqC,gCAAgC,MAAMC,0BAA0B,MAAM,CAAC;QAC3E,oBAAoBrD,wBAAwBkD;IAC9C;IACA,MAAM7C,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKJ;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMoC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMnB,SAAS,MAAM7B,mBAAmB;YACtC,QAAQgD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACA7C,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMuB,SAAS,MAAMC,yBACnBlD,MACA4B,aAAa,YAAY,EACzBnB;IAGF,IAAI0C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAahB,gBACjBe,aACA9C,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0B2D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DvD,aAAa,wBAAwB4D;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAStB,MAAM,OAAO,CAACsB,OAC/B,GAAG,CAAC,CAACA,OACGnB,gBACLmB,MACAlD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqB6D;QAGlC,MAAME,aAAaC,WAAW;YAACL;eAAeE;SAAe;QAC7D7D,aAAa,iBAAiB+D;QAG9BN,cAAcQ,iBAAiBF,YAAYnD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2ByD;IAC1C;IAEA,IAAIS,cAAcjD;IAClB,IAAIwC,aAAa;QACf,MAAMU,gBAAgB,MAAMC,WAC1BnD,kBACAwC,aACAzC,AAAW,cAAXA;QAEFkD,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAanB,KAAK,SAAS,CAACmB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAAwB1D,OAO7C;IACC,MAAM,EAAE2D,SAAS,EAAE1D,OAAO,EAAE2D,aAAa,EAAEnE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAMU,eAAemD;IACrB,MAAM,EAAEvD,gBAAgB,EAAE,GAAGL;IAE7B,MAAM6D,wBAAwB,MAAMC,uBAClC/D,QAAQ,eAAe,IAAI,IAC3B2D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK1D;YACL,QAAQ;QACV;IACF;IAGF0D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMnE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,IAAIvE,kBAAkB;QACpB,MAAM4B,SAAS,MAAM7B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI0B;IACf;IAEA,MAAMuB,SAAS,MAAMC,yBACnBlD,MACA4B,aAAa,YAAY,EACzBnB;IAEF,OAAO;QACL,aAAawC,OAAO,OAAO;QAC3B,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAeqB,sBACpBC,WAAmB,EACnB/D,QAAwE,EACxEC,WAAyB;IAKzB,MAAMM,eAAeyD;IACrB,MAAMC,aAAaC,0BAA0BH;IAE7C,MAAMvE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASe;QAAa;QACxC;YACE,MAAM;YACN,SAAS0D;QACX;KACD;IAED,MAAMxB,SAAS,MAAMzC,SACnBR,MACA4B,aAAa,eAAe,EAC5BnB;IAGF,OAAO;QACL,kBAAkBwC,OAAO,OAAO,CAAC,gBAAgB,IAAI;QACrD,OAAOA,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -4,68 +4,44 @@ function systemPromptToLocateElement(vlMode) {
|
|
|
4
4
|
const bboxComment = bboxDescription(vlMode);
|
|
5
5
|
return `
|
|
6
6
|
## Role:
|
|
7
|
-
You are an
|
|
7
|
+
You are an AI assistant that helps identify UI elements.
|
|
8
8
|
|
|
9
9
|
## Objective:
|
|
10
|
-
- Identify elements in screenshots
|
|
11
|
-
-
|
|
12
|
-
- Determine whether the user's description is order-sensitive (e.g., contains phrases like 'the third item in the list', 'the last button', etc.).
|
|
10
|
+
- Identify elements in screenshots that match the user's description.
|
|
11
|
+
- Provide the coordinates of the element that matches the user's description.
|
|
13
12
|
|
|
14
13
|
## Output Format:
|
|
15
14
|
\`\`\`json
|
|
16
15
|
{
|
|
17
16
|
"bbox": [number, number, number, number], // ${bboxComment}
|
|
18
|
-
"errors"?: string[]
|
|
19
|
-
"isOrderSensitive": boolean // Whether the targetElementDescription is order-sensitive (true/false)
|
|
17
|
+
"errors"?: string[]
|
|
20
18
|
}
|
|
21
19
|
\`\`\`
|
|
22
20
|
|
|
23
21
|
Fields:
|
|
24
|
-
* \`bbox\` is the bounding box of the element that matches the user's description
|
|
25
|
-
* \`isOrderSensitive\` is a boolean indicating whether the user's description is order-sensitive (true/false)
|
|
22
|
+
* \`bbox\` is the bounding box of the element that matches the user's description
|
|
26
23
|
* \`errors\` is an optional array of error messages (if any)
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
- "the third item in the list"
|
|
30
|
-
- "the last button"
|
|
31
|
-
- "the first input box"
|
|
32
|
-
- "the second row"
|
|
33
|
-
|
|
34
|
-
Not order-sensitive means the description is like:
|
|
35
|
-
- "confirm button"
|
|
36
|
-
- "search box"
|
|
37
|
-
- "password input"
|
|
38
|
-
|
|
39
|
-
For example, when an element is found and the description is order-sensitive:
|
|
25
|
+
For example, when an element is found:
|
|
40
26
|
\`\`\`json
|
|
41
27
|
{
|
|
42
28
|
"bbox": [100, 100, 200, 200],
|
|
43
|
-
"isOrderSensitive": true,
|
|
44
29
|
"errors": []
|
|
45
30
|
}
|
|
46
31
|
\`\`\`
|
|
47
32
|
|
|
48
|
-
When no element is found
|
|
33
|
+
When no element is found:
|
|
49
34
|
\`\`\`json
|
|
50
35
|
{
|
|
51
36
|
"bbox": [],
|
|
52
|
-
"isOrderSensitive": false,
|
|
53
37
|
"errors": ["I can see ..., but {some element} is not found"]
|
|
54
38
|
}
|
|
55
39
|
\`\`\`
|
|
56
40
|
`;
|
|
57
41
|
}
|
|
58
42
|
const findElementPrompt = new PromptTemplate({
|
|
59
|
-
template:
|
|
60
|
-
Here is the item user want to find:
|
|
61
|
-
=====================================
|
|
62
|
-
{targetElementDescription}
|
|
63
|
-
=====================================
|
|
64
|
-
|
|
65
|
-
{pageDescription}
|
|
66
|
-
`,
|
|
43
|
+
template: "Find: {targetElementDescription}",
|
|
67
44
|
inputVariables: [
|
|
68
|
-
"pageDescription",
|
|
69
45
|
"targetElementDescription"
|
|
70
46
|
]
|
|
71
47
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/llm-locator.mjs","sources":["webpack://@midscene/core/./src/ai-model/prompt/llm-locator.ts"],"sourcesContent":["import { PromptTemplate } from '@langchain/core/prompts';\nimport type { TVlModeTypes } from '@midscene/shared/env';\nimport { bboxDescription } from './common';\nexport function systemPromptToLocateElement(vlMode: TVlModeTypes | undefined) {\n const bboxComment = bboxDescription(vlMode);\n return `\n## Role:\nYou are an AI assistant that helps identify UI elements.\n\n## Objective:\n- Identify elements in screenshots that match the user's description.\n- Provide the coordinates of the element that matches the user's description.\n\n## Output Format:\n\\`\\`\\`json\n{\n \"bbox\": [number, number, number, number], // ${bboxComment}\n \"errors\"?: string[]\n}\n\\`\\`\\`\n\nFields:\n* \\`bbox\\` is the bounding box of the element that matches the user's description\n* \\`errors\\` is an optional array of error messages (if any)\n\nFor example, when an element is found:\n\\`\\`\\`json\n{\n \"bbox\": [100, 100, 200, 200],\n \"errors\": []\n}\n\\`\\`\\`\n\nWhen no element is found:\n\\`\\`\\`json\n{\n \"bbox\": [],\n \"errors\": [\"I can see ..., but {some element} is not found\"]\n}\n\\`\\`\\`\n`;\n}\n\nexport const findElementPrompt = new PromptTemplate({\n template: 'Find: {targetElementDescription}',\n inputVariables: ['targetElementDescription'],\n});\n"],"names":["systemPromptToLocateElement","vlMode","bboxComment","bboxDescription","findElementPrompt","PromptTemplate"],"mappings":";;AAGO,SAASA,4BAA4BC,MAAgC;IAC1E,MAAMC,cAAcC,gBAAgBF;IACpC,OAAO,CAAC;;;;;;;;;;;gDAWsC,EAAEC,YAAY;;;;;;;;;;;;;;;;;;;;;;;;AAwB9D,CAAC;AACD;AAEO,MAAME,oBAAoB,IAAIC,eAAe;IAClD,UAAU;IACV,gBAAgB;QAAC;KAA2B;AAC9C"}
|