@midscene/core 0.26.7-beta-20250818035341.0 → 0.26.7-beta-20250820105545.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +0 -8
- package/dist/es/ai-model/action-executor.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +73 -52
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +3 -3
- package/dist/es/ai-model/inspect.mjs +29 -66
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +27 -24
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs +1 -25
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +50 -23
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/playwright-generator.mjs +9 -3
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +2 -2
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +9 -3
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +75 -118
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +5 -5
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/index.mjs +3 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +14 -97
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +1 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +5 -6
- package/dist/es/utils.mjs.map +1 -1
- package/dist/lib/ai-model/action-executor.js +0 -8
- package/dist/lib/ai-model/action-executor.js.map +1 -1
- package/dist/lib/ai-model/common.js +97 -55
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +16 -4
- package/dist/lib/ai-model/inspect.js +29 -69
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +26 -23
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -29
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +52 -25
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/playwright-generator.js +9 -3
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +2 -2
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +9 -3
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +78 -124
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +5 -5
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/index.js +20 -7
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +10 -93
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +1 -3
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +4 -5
- package/dist/lib/utils.js.map +1 -1
- package/dist/types/ai-model/common.d.ts +162 -8
- package/dist/types/ai-model/index.d.ts +2 -1
- package/dist/types/ai-model/inspect.d.ts +3 -8
- package/dist/types/ai-model/llm-planning.d.ts +1 -1
- package/dist/types/ai-model/prompt/assertion.d.ts +0 -3
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -2
- package/dist/types/ai-model/prompt/util.d.ts +2 -1
- package/dist/types/ai-model/service-caller/index.d.ts +6 -6
- package/dist/types/ai-model/ui-tars-planning.d.ts +3 -1
- package/dist/types/index.d.ts +3 -1
- package/dist/types/insight/index.d.ts +1 -5
- package/dist/types/types.d.ts +11 -12
- package/dist/types/yaml.d.ts +7 -6
- package/package.json +4 -3
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import { getVersion } from "../utils.mjs";
|
|
2
|
-
import { MIDSCENE_MODEL_NAME, getAIConfig, uiTarsModelVersion, vlLocateMode } from "@midscene/shared/env";
|
|
3
2
|
import { assert } from "@midscene/shared/utils";
|
|
4
3
|
function _define_property(obj, key, value) {
|
|
5
4
|
if (key in obj) Object.defineProperty(obj, key, {
|
|
@@ -108,15 +107,8 @@ class Executor {
|
|
|
108
107
|
return null;
|
|
109
108
|
}
|
|
110
109
|
dump() {
|
|
111
|
-
let modelDescription = '';
|
|
112
|
-
if (vlLocateMode()) {
|
|
113
|
-
const uiTarsModelVer = uiTarsModelVersion();
|
|
114
|
-
modelDescription = uiTarsModelVer ? `UI-TARS=${uiTarsModelVer}` : `${vlLocateMode()} mode`;
|
|
115
|
-
}
|
|
116
110
|
const dumpData = {
|
|
117
111
|
sdkVersion: getVersion(),
|
|
118
|
-
model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',
|
|
119
|
-
model_description: modelDescription,
|
|
120
112
|
logTime: Date.now(),
|
|
121
113
|
name: this.name,
|
|
122
114
|
tasks: this.tasks
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/action-executor.mjs","sources":["webpack://@midscene/core/./src/ai-model/action-executor.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport {\n MIDSCENE_MODEL_NAME,\n getAIConfig,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<{ output: any; thought?: string } | undefined> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n const { thought, output } = this.tasks[outputIndex];\n return {\n thought,\n output,\n };\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n let modelDescription = '';\n\n if (vlLocateMode()) {\n const uiTarsModelVer = uiTarsModelVersion();\n if (uiTarsModelVer) {\n modelDescription = `UI-TARS=${uiTarsModelVer}`;\n } else {\n modelDescription = `${vlLocateMode()} mode`;\n }\n }\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n model_name: getAIConfig(MIDSCENE_MODEL_NAME) || '',\n model_description: modelDescription,\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n"],"names":["Executor","task","_this_latestErrorTask","_this_latestErrorTask1","assert","Array","item","console","nextPendingIndex","taskIndex","successfullyCompleted","previousFindOutput","Date","e","executor","param","returnValue","executorContext","Object","i","outputIndex","Math","thought","output","errorTaskIndex","modelDescription","vlLocateMode","uiTarsModelVer","uiTarsModelVersion","dumpData","getVersion","getAIConfig","MIDSCENE_MODEL_NAME","name","options"],"mappings":";;;;;;;;;;;;;AAkBO,MAAMA;IAyBH,kBAAkBC,IAAwB,EAAiB;QACjE,OAAO;YACL,QAAQ;YACR,GAAGA,IAAI;QACT;IACF;IAEA,MAAM,OAAOA,IAA+C,EAAiB;YAGhBC,uBAAkCC;QAF7FC,OACE,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACX,CAAC,sDAAsD,EAAE,QAAAF,CAAAA,wBAAAA,IAAI,CAAC,eAAe,EAAC,IAArBA,KAAAA,IAAAA,sBAAwB,KAAK,CAAC,EAAE,EAAE,QAAAC,CAAAA,yBAAAA,IAAI,CAAC,eAAe,EAAC,IAArBA,KAAAA,IAAAA,uBAAwB,UAAU,EAAE;QAEjI,IAAIE,MAAM,OAAO,CAACJ,OAChB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAIA,KAAK,GAAG,CAAC,CAACK,OAAS,IAAI,CAAC,iBAAiB,CAACA;aAE7D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAACL;QAEzC,IAAI,AAAgB,cAAhB,IAAI,CAAC,MAAM,EACb,IAAI,CAAC,MAAM,GAAG;IAElB;IAEA,MAAM,QAAgE;QACpE,IAAI,AAAgB,WAAhB,IAAI,CAAC,MAAM,IAAe,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAChDM,QAAQ,IAAI,CACV;QAIJH,OAAO,AAAgB,cAAhB,IAAI,CAAC,MAAM,EAAgB;QAClCA,OAAO,AAAgB,gBAAhB,IAAI,CAAC,MAAM,EAAkB;QACpCA,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM,EAAc;QAEhC,MAAMI,mBAAmB,IAAI,CAAC,KAAK,CAAC,SAAS,CAC3C,CAACP,OAASA,AAAgB,cAAhBA,KAAK,MAAM;QAEvB,IAAIO,mBAAmB,GAErB;QAGF,IAAI,CAAC,MAAM,GAAG;QACd,IAAIC,YAAYD;QAChB,IAAIE,wBAAwB;QAE5B,IAAIC;QAEJ,MAAOF,YAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAE;YACpC,MAAMR,OAAO,IAAI,CAAC,KAAK,CAACQ,UAAU;YAClCL,OACEH,AAAgB,cAAhBA,KAAK,MAAM,EACX,CAAC,wCAAwC,EAAEA,KAAK,MAAM,EAAE;YAE1DA,KAAK,MAAM,GAAG;gBACZ,OAAOW,KAAK,GAAG;YACjB;YACA,IAAI;gBACFX,KAAK,MAAM,GAAG;gBACd,IAAI;oBACF,IAAI,IAAI,CAAC,WAAW,EAClB,MAAM,IAAI,CAAC,WAAW,CAACA;gBAE3B,EAAE,OAAOY,GAAG;oBACVN,QAAQ,KAAK,CAAC,wBAAwBM;gBACxC;gBACAT,OACE;oBAAC;oBAAW;oBAAU;iBAAW,CAAC,OAAO,CAACH,KAAK,IAAI,KAAK,GACxD,CAAC,uBAAuB,EAAEA,KAAK,IAAI,EAAE;gBAGvC,MAAM,EAAEa,QAAQ,EAAEC,KAAK,EAAE,GAAGd;gBAC5BG,OAAOU,UAAU,CAAC,oCAAoC,EAAEb,KAAK,IAAI,EAAE;gBAEnE,IAAIe;gBACJ,MAAMC,kBAAmC;oBACvChB;oBACA,SAASU,QAAAA,qBAAAA,KAAAA,IAAAA,mBAAoB,OAAO;gBACtC;gBAEA,IAAIV,AAAc,cAAdA,KAAK,IAAI,EAAgB;oBAC3BG,OACEH,AAAiB,aAAjBA,KAAK,OAAO,IACVA,AAAiB,YAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,EACd,CAAC,6BAA6B,EAAEA,KAAK,OAAO,EAAE;oBAEhDe,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;oBACzC,IAAIhB,AAAiB,aAAjBA,KAAK,OAAO,EACdU,qBACEK,QAAAA,cAAAA,KAAAA,IAAAA,YACC,MAAM;gBAEb,OAAO,IAAIf,AAAc,aAAdA,KAAK,IAAI,IAAiBA,AAAc,eAAdA,KAAK,IAAI,EAC5Ce,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;qBACpC;oBACLV,QAAQ,IAAI,CACV,CAAC,uBAAuB,EAAEN,KAAK,IAAI,CAAC,iCAAiC,CAAC;oBAExEe,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;gBAC3C;gBAEAC,OAAO,MAAM,CAACjB,MAAMe;gBACpBf,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGW,KAAK,GAAG;gBAC1BX,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtDQ;YACF,EAAE,OAAOI,GAAQ;gBACfH,wBAAwB;gBACxBT,KAAK,KAAK,GAAGY;gBACbZ,KAAK,YAAY,GACfY,AAAAA,CAAAA,QAAAA,IAAAA,KAAAA,IAAAA,EAAG,OAAO,AAAD,KAAM,CAAa,YAAb,OAAOA,IAAiBA,IAAI,uBAAsB;gBACnEZ,KAAK,UAAU,GAAGY,EAAE,KAAK;gBAEzBZ,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGW,KAAK,GAAG;gBAC1BX,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD;YACF;QACF;QAGA,IAAK,IAAIkB,IAAIV,YAAY,GAAGU,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAEA,IACjD,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,GAAG;QAGzB,IAAIT,uBACF,IAAI,CAAC,MAAM,GAAG;aAEd,IAAI,CAAC,MAAM,GAAG;QAGhB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAErB,MAAMU,cAAcC,KAAK,GAAG,CAACZ,WAAW,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;YAC5D,MAAM,EAAEa,OAAO,EAAEC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAACH,YAAY;YACnD,OAAO;gBACLE;gBACAC;YACF;QACF;IACF;IAEA,iBAA0B;QACxB,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM;IACpB;IAEA,kBAAwC;QACtC,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb,OAAO;QAET,MAAMC,iBAAiB,IAAI,CAAC,KAAK,CAAC,SAAS,CACzC,CAACvB,OAASA,AAAgB,aAAhBA,KAAK,MAAM;QAEvB,IAAIuB,kBAAkB,GACpB,OAAO,IAAI,CAAC,KAAK,CAACA,eAAe;QAEnC,OAAO;IACT;IAEA,OAAsB;QACpB,IAAIC,mBAAmB;QAEvB,IAAIC,gBAAgB;YAClB,MAAMC,iBAAiBC;YAErBH,mBADEE,iBACiB,CAAC,QAAQ,EAAEA,gBAAgB,GAE3B,GAAGD,eAAe,KAAK,CAAC;QAE/C;QACA,MAAMG,WAA0B;YAC9B,YAAYC;YACZ,YAAYC,YAAYC,wBAAwB;YAChD,mBAAmBP;YACnB,SAASb,KAAK,GAAG;YACjB,MAAM,IAAI,CAAC,IAAI;YACf,OAAO,IAAI,CAAC,KAAK;QACnB;QACA,OAAOiB;IACT;IArMA,YACEI,IAAY,EACZC,OAEC,CACD;QAdF;QAEA;QAGA;QAEA;QAQE,IAAI,CAAC,MAAM,GACTA,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK,AAAD,KAAKA,QAAQ,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;QAC3D,IAAI,CAAC,IAAI,GAAGD;QACZ,IAAI,CAAC,KAAK,GAAIC,AAAAA,CAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK,AAAD,KAAK,EAAC,EAAG,GAAG,CAAC,CAAC5B,OACvC,IAAI,CAAC,iBAAiB,CAACA;QAEzB,IAAI,CAAC,WAAW,GAAG4B,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,WAAW;IACzC;AAyLF"}
|
|
1
|
+
{"version":3,"file":"ai-model/action-executor.mjs","sources":["webpack://@midscene/core/./src/ai-model/action-executor.ts"],"sourcesContent":["import type {\n ExecutionDump,\n ExecutionTask,\n ExecutionTaskApply,\n ExecutionTaskInsightLocateOutput,\n ExecutionTaskProgressOptions,\n ExecutionTaskReturn,\n ExecutorContext,\n} from '@/types';\nimport { getVersion } from '@/utils';\nimport { assert } from '@midscene/shared/utils';\n\nexport class Executor {\n name: string;\n\n tasks: ExecutionTask[];\n\n // status of executor\n status: 'init' | 'pending' | 'running' | 'completed' | 'error';\n\n onTaskStart?: ExecutionTaskProgressOptions['onTaskStart'];\n\n constructor(\n name: string,\n options?: ExecutionTaskProgressOptions & {\n tasks?: ExecutionTaskApply[];\n },\n ) {\n this.status =\n options?.tasks && options.tasks.length > 0 ? 'pending' : 'init';\n this.name = name;\n this.tasks = (options?.tasks || []).map((item) =>\n this.markTaskAsPending(item),\n );\n this.onTaskStart = options?.onTaskStart;\n }\n\n private markTaskAsPending(task: ExecutionTaskApply): ExecutionTask {\n return {\n status: 'pending',\n ...task,\n };\n }\n\n async append(task: ExecutionTaskApply[] | ExecutionTaskApply): Promise<void> {\n assert(\n this.status !== 'error',\n `executor is in error state, cannot append task\\nerror=${this.latestErrorTask()?.error}\\n${this.latestErrorTask()?.errorStack}`,\n );\n if (Array.isArray(task)) {\n this.tasks.push(...task.map((item) => this.markTaskAsPending(item)));\n } else {\n this.tasks.push(this.markTaskAsPending(task));\n }\n if (this.status !== 'running') {\n this.status = 'pending';\n }\n }\n\n async flush(): Promise<{ output: any; thought?: string } | undefined> {\n if (this.status === 'init' && this.tasks.length > 0) {\n console.warn(\n 'illegal state for executor, status is init but tasks are not empty',\n );\n }\n\n assert(this.status !== 'running', 'executor is already running');\n assert(this.status !== 'completed', 'executor is already completed');\n assert(this.status !== 'error', 'executor is in error state');\n\n const nextPendingIndex = this.tasks.findIndex(\n (task) => task.status === 'pending',\n );\n if (nextPendingIndex < 0) {\n // all tasks are completed\n return;\n }\n\n this.status = 'running';\n let taskIndex = nextPendingIndex;\n let successfullyCompleted = true;\n\n let previousFindOutput: ExecutionTaskInsightLocateOutput | undefined;\n\n while (taskIndex < this.tasks.length) {\n const task = this.tasks[taskIndex];\n assert(\n task.status === 'pending',\n `task status should be pending, but got: ${task.status}`,\n );\n task.timing = {\n start: Date.now(),\n };\n try {\n task.status = 'running';\n try {\n if (this.onTaskStart) {\n await this.onTaskStart(task);\n }\n } catch (e) {\n console.error('error in onTaskStart', e);\n }\n assert(\n ['Insight', 'Action', 'Planning'].indexOf(task.type) >= 0,\n `unsupported task type: ${task.type}`,\n );\n\n const { executor, param } = task;\n assert(executor, `executor is required for task type: ${task.type}`);\n\n let returnValue;\n const executorContext: ExecutorContext = {\n task,\n element: previousFindOutput?.element,\n };\n\n if (task.type === 'Insight') {\n assert(\n task.subType === 'Locate' ||\n task.subType === 'Query' ||\n task.subType === 'Assert' ||\n task.subType === 'Boolean' ||\n task.subType === 'Number' ||\n task.subType === 'String',\n `unsupported insight subType: ${task.subType}`,\n );\n returnValue = await task.executor(param, executorContext);\n if (task.subType === 'Locate') {\n previousFindOutput = (\n returnValue as ExecutionTaskReturn<ExecutionTaskInsightLocateOutput>\n )?.output;\n }\n } else if (task.type === 'Action' || task.type === 'Planning') {\n returnValue = await task.executor(param, executorContext);\n } else {\n console.warn(\n `unsupported task type: ${task.type}, will try to execute it directly`,\n );\n returnValue = await task.executor(param, executorContext);\n }\n\n Object.assign(task, returnValue);\n task.status = 'finished';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n taskIndex++;\n } catch (e: any) {\n successfullyCompleted = false;\n task.error = e;\n task.errorMessage =\n e?.message || (typeof e === 'string' ? e : 'error-without-message');\n task.errorStack = e.stack;\n\n task.status = 'failed';\n task.timing.end = Date.now();\n task.timing.cost = task.timing.end - task.timing.start;\n break;\n }\n }\n\n // set all remaining tasks as cancelled\n for (let i = taskIndex + 1; i < this.tasks.length; i++) {\n this.tasks[i].status = 'cancelled';\n }\n\n if (successfullyCompleted) {\n this.status = 'completed';\n } else {\n this.status = 'error';\n }\n\n if (this.tasks.length) {\n // return the last output\n const outputIndex = Math.min(taskIndex, this.tasks.length - 1);\n const { thought, output } = this.tasks[outputIndex];\n return {\n thought,\n output,\n };\n }\n }\n\n isInErrorState(): boolean {\n return this.status === 'error';\n }\n\n latestErrorTask(): ExecutionTask | null {\n if (this.status !== 'error') {\n return null;\n }\n const errorTaskIndex = this.tasks.findIndex(\n (task) => task.status === 'failed',\n );\n if (errorTaskIndex >= 0) {\n return this.tasks[errorTaskIndex];\n }\n return null;\n }\n\n dump(): ExecutionDump {\n const dumpData: ExecutionDump = {\n sdkVersion: getVersion(),\n logTime: Date.now(),\n name: this.name,\n tasks: this.tasks,\n };\n return dumpData;\n }\n}\n"],"names":["Executor","task","_this_latestErrorTask","_this_latestErrorTask1","assert","Array","item","console","nextPendingIndex","taskIndex","successfullyCompleted","previousFindOutput","Date","e","executor","param","returnValue","executorContext","Object","i","outputIndex","Math","thought","output","errorTaskIndex","dumpData","getVersion","name","options"],"mappings":";;;;;;;;;;;;AAYO,MAAMA;IAyBH,kBAAkBC,IAAwB,EAAiB;QACjE,OAAO;YACL,QAAQ;YACR,GAAGA,IAAI;QACT;IACF;IAEA,MAAM,OAAOA,IAA+C,EAAiB;YAGhBC,uBAAkCC;QAF7FC,OACE,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACX,CAAC,sDAAsD,EAAE,QAAAF,CAAAA,wBAAAA,IAAI,CAAC,eAAe,EAAC,IAArBA,KAAAA,IAAAA,sBAAwB,KAAK,CAAC,EAAE,EAAE,QAAAC,CAAAA,yBAAAA,IAAI,CAAC,eAAe,EAAC,IAArBA,KAAAA,IAAAA,uBAAwB,UAAU,EAAE;QAEjI,IAAIE,MAAM,OAAO,CAACJ,OAChB,IAAI,CAAC,KAAK,CAAC,IAAI,IAAIA,KAAK,GAAG,CAAC,CAACK,OAAS,IAAI,CAAC,iBAAiB,CAACA;aAE7D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAACL;QAEzC,IAAI,AAAgB,cAAhB,IAAI,CAAC,MAAM,EACb,IAAI,CAAC,MAAM,GAAG;IAElB;IAEA,MAAM,QAAgE;QACpE,IAAI,AAAgB,WAAhB,IAAI,CAAC,MAAM,IAAe,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,GAChDM,QAAQ,IAAI,CACV;QAIJH,OAAO,AAAgB,cAAhB,IAAI,CAAC,MAAM,EAAgB;QAClCA,OAAO,AAAgB,gBAAhB,IAAI,CAAC,MAAM,EAAkB;QACpCA,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM,EAAc;QAEhC,MAAMI,mBAAmB,IAAI,CAAC,KAAK,CAAC,SAAS,CAC3C,CAACP,OAASA,AAAgB,cAAhBA,KAAK,MAAM;QAEvB,IAAIO,mBAAmB,GAErB;QAGF,IAAI,CAAC,MAAM,GAAG;QACd,IAAIC,YAAYD;QAChB,IAAIE,wBAAwB;QAE5B,IAAIC;QAEJ,MAAOF,YAAY,IAAI,CAAC,KAAK,CAAC,MAAM,CAAE;YACpC,MAAMR,OAAO,IAAI,CAAC,KAAK,CAACQ,UAAU;YAClCL,OACEH,AAAgB,cAAhBA,KAAK,MAAM,EACX,CAAC,wCAAwC,EAAEA,KAAK,MAAM,EAAE;YAE1DA,KAAK,MAAM,GAAG;gBACZ,OAAOW,KAAK,GAAG;YACjB;YACA,IAAI;gBACFX,KAAK,MAAM,GAAG;gBACd,IAAI;oBACF,IAAI,IAAI,CAAC,WAAW,EAClB,MAAM,IAAI,CAAC,WAAW,CAACA;gBAE3B,EAAE,OAAOY,GAAG;oBACVN,QAAQ,KAAK,CAAC,wBAAwBM;gBACxC;gBACAT,OACE;oBAAC;oBAAW;oBAAU;iBAAW,CAAC,OAAO,CAACH,KAAK,IAAI,KAAK,GACxD,CAAC,uBAAuB,EAAEA,KAAK,IAAI,EAAE;gBAGvC,MAAM,EAAEa,QAAQ,EAAEC,KAAK,EAAE,GAAGd;gBAC5BG,OAAOU,UAAU,CAAC,oCAAoC,EAAEb,KAAK,IAAI,EAAE;gBAEnE,IAAIe;gBACJ,MAAMC,kBAAmC;oBACvChB;oBACA,SAASU,QAAAA,qBAAAA,KAAAA,IAAAA,mBAAoB,OAAO;gBACtC;gBAEA,IAAIV,AAAc,cAAdA,KAAK,IAAI,EAAgB;oBAC3BG,OACEH,AAAiB,aAAjBA,KAAK,OAAO,IACVA,AAAiB,YAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,cAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,IACZA,AAAiB,aAAjBA,KAAK,OAAO,EACd,CAAC,6BAA6B,EAAEA,KAAK,OAAO,EAAE;oBAEhDe,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;oBACzC,IAAIhB,AAAiB,aAAjBA,KAAK,OAAO,EACdU,qBACEK,QAAAA,cAAAA,KAAAA,IAAAA,YACC,MAAM;gBAEb,OAAO,IAAIf,AAAc,aAAdA,KAAK,IAAI,IAAiBA,AAAc,eAAdA,KAAK,IAAI,EAC5Ce,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;qBACpC;oBACLV,QAAQ,IAAI,CACV,CAAC,uBAAuB,EAAEN,KAAK,IAAI,CAAC,iCAAiC,CAAC;oBAExEe,cAAc,MAAMf,KAAK,QAAQ,CAACc,OAAOE;gBAC3C;gBAEAC,OAAO,MAAM,CAACjB,MAAMe;gBACpBf,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGW,KAAK,GAAG;gBAC1BX,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtDQ;YACF,EAAE,OAAOI,GAAQ;gBACfH,wBAAwB;gBACxBT,KAAK,KAAK,GAAGY;gBACbZ,KAAK,YAAY,GACfY,AAAAA,CAAAA,QAAAA,IAAAA,KAAAA,IAAAA,EAAG,OAAO,AAAD,KAAM,CAAa,YAAb,OAAOA,IAAiBA,IAAI,uBAAsB;gBACnEZ,KAAK,UAAU,GAAGY,EAAE,KAAK;gBAEzBZ,KAAK,MAAM,GAAG;gBACdA,KAAK,MAAM,CAAC,GAAG,GAAGW,KAAK,GAAG;gBAC1BX,KAAK,MAAM,CAAC,IAAI,GAAGA,KAAK,MAAM,CAAC,GAAG,GAAGA,KAAK,MAAM,CAAC,KAAK;gBACtD;YACF;QACF;QAGA,IAAK,IAAIkB,IAAIV,YAAY,GAAGU,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAEA,IACjD,IAAI,CAAC,KAAK,CAACA,EAAE,CAAC,MAAM,GAAG;QAGzB,IAAIT,uBACF,IAAI,CAAC,MAAM,GAAG;aAEd,IAAI,CAAC,MAAM,GAAG;QAGhB,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAErB,MAAMU,cAAcC,KAAK,GAAG,CAACZ,WAAW,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG;YAC5D,MAAM,EAAEa,OAAO,EAAEC,MAAM,EAAE,GAAG,IAAI,CAAC,KAAK,CAACH,YAAY;YACnD,OAAO;gBACLE;gBACAC;YACF;QACF;IACF;IAEA,iBAA0B;QACxB,OAAO,AAAgB,YAAhB,IAAI,CAAC,MAAM;IACpB;IAEA,kBAAwC;QACtC,IAAI,AAAgB,YAAhB,IAAI,CAAC,MAAM,EACb,OAAO;QAET,MAAMC,iBAAiB,IAAI,CAAC,KAAK,CAAC,SAAS,CACzC,CAACvB,OAASA,AAAgB,aAAhBA,KAAK,MAAM;QAEvB,IAAIuB,kBAAkB,GACpB,OAAO,IAAI,CAAC,KAAK,CAACA,eAAe;QAEnC,OAAO;IACT;IAEA,OAAsB;QACpB,MAAMC,WAA0B;YAC9B,YAAYC;YACZ,SAASd,KAAK,GAAG;YACjB,MAAM,IAAI,CAAC,IAAI;YACf,OAAO,IAAI,CAAC,KAAK;QACnB;QACA,OAAOa;IACT;IAzLA,YACEE,IAAY,EACZC,OAEC,CACD;QAdF;QAEA;QAGA;QAEA;QAQE,IAAI,CAAC,MAAM,GACTA,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK,AAAD,KAAKA,QAAQ,KAAK,CAAC,MAAM,GAAG,IAAI,YAAY;QAC3D,IAAI,CAAC,IAAI,GAAGD;QACZ,IAAI,CAAC,KAAK,GAAIC,AAAAA,CAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,KAAK,AAAD,KAAK,EAAC,EAAG,GAAG,CAAC,CAACtB,OACvC,IAAI,CAAC,iBAAiB,CAACA;QAEzB,IAAI,CAAC,WAAW,GAAGsB,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,WAAW;IACzC;AA6KF"}
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { assert } from "@midscene/shared/utils";
|
|
2
|
-
import { callToGetJSONObject
|
|
2
|
+
import { callToGetJSONObject } from "./service-caller/index.mjs";
|
|
3
3
|
import { NodeType } from "@midscene/shared/constants";
|
|
4
|
-
import { vlLocateMode } from "@midscene/shared/env";
|
|
4
|
+
import { getModelName, vlLocateMode } from "@midscene/shared/env";
|
|
5
5
|
import { treeToList } from "@midscene/shared/extractor";
|
|
6
6
|
import { compositeElementInfoImg } from "@midscene/shared/img";
|
|
7
7
|
import { getDebug } from "@midscene/shared/logger";
|
|
8
|
+
import { z } from "zod";
|
|
8
9
|
var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
9
10
|
AIActionType[AIActionType["ASSERT"] = 0] = "ASSERT";
|
|
10
11
|
AIActionType[AIActionType["INSPECT_ELEMENT"] = 1] = "INSPECT_ELEMENT";
|
|
@@ -13,8 +14,9 @@ var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
|
13
14
|
AIActionType[AIActionType["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
14
15
|
return AIActionType;
|
|
15
16
|
}({});
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
const actionSpaceTypePrefix = 'action_space_';
|
|
18
|
+
async function callAiFn(msgs, AIActionTypeValue, modelPreferences) {
|
|
19
|
+
const jsonObject = await callToGetJSONObject(msgs, AIActionTypeValue, modelPreferences);
|
|
18
20
|
return {
|
|
19
21
|
content: jsonObject.content,
|
|
20
22
|
usage: jsonObject.usage
|
|
@@ -22,12 +24,12 @@ async function callAiFn(msgs, AIActionTypeValue) {
|
|
|
22
24
|
}
|
|
23
25
|
const defaultBboxSize = 20;
|
|
24
26
|
const debugInspectUtils = getDebug('ai:common');
|
|
25
|
-
function fillBboxParam(locate, width, height) {
|
|
27
|
+
function fillBboxParam(locate, width, height, modelPreferences) {
|
|
26
28
|
if (locate.bbox_2d && !(null == locate ? void 0 : locate.bbox)) {
|
|
27
29
|
locate.bbox = locate.bbox_2d;
|
|
28
30
|
delete locate.bbox_2d;
|
|
29
31
|
}
|
|
30
|
-
if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height);
|
|
32
|
+
if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, modelPreferences);
|
|
31
33
|
return locate;
|
|
32
34
|
}
|
|
33
35
|
function adaptQwenBbox(bbox) {
|
|
@@ -89,9 +91,9 @@ function adaptDoubaoBbox(bbox, width, height) {
|
|
|
89
91
|
const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
|
|
90
92
|
throw new Error(msg);
|
|
91
93
|
}
|
|
92
|
-
function adaptBbox(bbox, width, height) {
|
|
93
|
-
if ('doubao-vision' === vlLocateMode() || 'vlm-ui-tars' === vlLocateMode()) return adaptDoubaoBbox(bbox, width, height);
|
|
94
|
-
if ('gemini' === vlLocateMode()) return adaptGeminiBbox(bbox, width, height);
|
|
94
|
+
function adaptBbox(bbox, width, height, modelPreferences) {
|
|
95
|
+
if ('doubao-vision' === vlLocateMode(modelPreferences) || 'vlm-ui-tars' === vlLocateMode(modelPreferences)) return adaptDoubaoBbox(bbox, width, height);
|
|
96
|
+
if ('gemini' === vlLocateMode(modelPreferences)) return adaptGeminiBbox(bbox, width, height);
|
|
95
97
|
return adaptQwenBbox(bbox);
|
|
96
98
|
}
|
|
97
99
|
function adaptGeminiBbox(bbox, width, height) {
|
|
@@ -106,9 +108,9 @@ function adaptGeminiBbox(bbox, width, height) {
|
|
|
106
108
|
bottom
|
|
107
109
|
];
|
|
108
110
|
}
|
|
109
|
-
function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0) {
|
|
111
|
+
function adaptBboxToRect(bbox, width, height, modelPreferences, offsetX = 0, offsetY = 0) {
|
|
110
112
|
debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);
|
|
111
|
-
const [left, top, right, bottom] = adaptBbox(bbox, width, height);
|
|
113
|
+
const [left, top, right, bottom] = adaptBbox(bbox, width, height, modelPreferences);
|
|
112
114
|
const rect = {
|
|
113
115
|
left: left + offsetX,
|
|
114
116
|
top: top + offsetY,
|
|
@@ -119,10 +121,10 @@ function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0) {
|
|
|
119
121
|
return rect;
|
|
120
122
|
}
|
|
121
123
|
let warned = false;
|
|
122
|
-
function warnGPT4oSizeLimit(size) {
|
|
124
|
+
function warnGPT4oSizeLimit(size, modelPreferences) {
|
|
123
125
|
var _getModelName;
|
|
124
126
|
if (warned) return;
|
|
125
|
-
if (null == (_getModelName = getModelName()) ? void 0 : _getModelName.toLowerCase().includes('gpt-4o')) {
|
|
127
|
+
if (null == (_getModelName = getModelName(modelPreferences)) ? void 0 : _getModelName.toLowerCase().includes('gpt-4o')) {
|
|
126
128
|
const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;
|
|
127
129
|
if (Math.max(size.width, size.height) > 2000 || Math.min(size.width, size.height) > 768) {
|
|
128
130
|
console.warn(warningMsg);
|
|
@@ -145,8 +147,8 @@ function mergeRects(rects) {
|
|
|
145
147
|
height: maxBottom - minTop
|
|
146
148
|
};
|
|
147
149
|
}
|
|
148
|
-
function expandSearchArea(rect, screenSize) {
|
|
149
|
-
const minEdgeSize = 'doubao-vision' === vlLocateMode() ? 500 : 300;
|
|
150
|
+
function expandSearchArea(rect, screenSize, modelPreferences) {
|
|
151
|
+
const minEdgeSize = 'doubao-vision' === vlLocateMode(modelPreferences) ? 500 : 300;
|
|
150
152
|
const defaultPadding = 160;
|
|
151
153
|
const paddingSizeHorizontal = rect.width < minEdgeSize ? Math.ceil((minEdgeSize - rect.width) / 2) : defaultPadding;
|
|
152
154
|
const paddingSizeVertical = rect.height < minEdgeSize ? Math.ceil((minEdgeSize - rect.height) / 2) : defaultPadding;
|
|
@@ -169,51 +171,70 @@ async function markupImageForLLM(screenshotBase64, tree, size) {
|
|
|
169
171
|
});
|
|
170
172
|
return imagePayload;
|
|
171
173
|
}
|
|
172
|
-
function buildYamlFlowFromPlans(plans, sleep) {
|
|
174
|
+
function buildYamlFlowFromPlans(plans, actionSpace, sleep) {
|
|
173
175
|
const flow = [];
|
|
174
176
|
for (const plan of plans){
|
|
175
177
|
var _plan_locate;
|
|
176
|
-
const
|
|
178
|
+
const verb = plan.type;
|
|
179
|
+
const action = actionSpace.find((action)=>action.name === verb);
|
|
180
|
+
if (!action) {
|
|
181
|
+
console.warn(`Cannot convert action ${verb} to yaml flow. Will ignore it.`);
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
177
184
|
const locate = null == (_plan_locate = plan.locate) ? void 0 : _plan_locate.prompt;
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
else if ('Input' === type) {
|
|
185
|
-
const param = plan.param;
|
|
186
|
-
flow.push({
|
|
187
|
-
aiInput: param.value,
|
|
188
|
-
locate
|
|
189
|
-
});
|
|
190
|
-
} else if ('KeyboardPress' === type) {
|
|
191
|
-
const param = plan.param;
|
|
192
|
-
flow.push({
|
|
193
|
-
aiKeyboardPress: param.value,
|
|
194
|
-
locate
|
|
195
|
-
});
|
|
196
|
-
} else if ('Scroll' === type) {
|
|
197
|
-
const param = plan.param;
|
|
198
|
-
flow.push({
|
|
199
|
-
aiScroll: null,
|
|
200
|
-
locate,
|
|
201
|
-
direction: param.direction,
|
|
202
|
-
scrollType: param.scrollType,
|
|
203
|
-
distance: param.distance
|
|
204
|
-
});
|
|
205
|
-
} else if ('Sleep' === type) {
|
|
206
|
-
const param = plan.param;
|
|
207
|
-
flow.push({
|
|
208
|
-
sleep: param.timeMs
|
|
209
|
-
});
|
|
210
|
-
} else 'AndroidBackButton' === type || 'AndroidHomeButton' === type || 'AndroidRecentAppsButton' === type || 'AndroidLongPress' === type || 'AndroidPull' === type || 'Error' === type || 'Assert' === type || 'AssertWithoutThrow' === type || 'Finished' === type || console.warn(`Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`);
|
|
185
|
+
const flowKey = action.interfaceAlias || `${actionSpaceTypePrefix}${verb}`;
|
|
186
|
+
const flowItem = {
|
|
187
|
+
[flowKey]: locate || '',
|
|
188
|
+
...plan.param || {}
|
|
189
|
+
};
|
|
190
|
+
flow.push(flowItem);
|
|
211
191
|
}
|
|
212
192
|
if (sleep) flow.push({
|
|
213
|
-
sleep
|
|
193
|
+
sleep
|
|
214
194
|
});
|
|
215
195
|
return flow;
|
|
216
196
|
}
|
|
217
|
-
|
|
197
|
+
const PointSchema = z.object({
|
|
198
|
+
left: z.number(),
|
|
199
|
+
top: z.number()
|
|
200
|
+
});
|
|
201
|
+
const SizeSchema = z.object({
|
|
202
|
+
width: z.number(),
|
|
203
|
+
height: z.number(),
|
|
204
|
+
dpr: z.number().optional()
|
|
205
|
+
});
|
|
206
|
+
const RectSchema = PointSchema.and(SizeSchema).and(z.object({
|
|
207
|
+
zoom: z.number().optional()
|
|
208
|
+
}));
|
|
209
|
+
const MidsceneLocation = z.object({
|
|
210
|
+
midscene_location_field_flag: z.literal(true),
|
|
211
|
+
prompt: z.string(),
|
|
212
|
+
center: z.tuple([
|
|
213
|
+
z.number(),
|
|
214
|
+
z.number()
|
|
215
|
+
]),
|
|
216
|
+
rect: RectSchema
|
|
217
|
+
}).passthrough();
|
|
218
|
+
const ifMidsceneLocatorField = (field)=>{
|
|
219
|
+
var _actualField__def, _actualField__def1;
|
|
220
|
+
let actualField = field;
|
|
221
|
+
if ((null == (_actualField__def = actualField._def) ? void 0 : _actualField__def.typeName) === 'ZodOptional') actualField = actualField._def.innerType;
|
|
222
|
+
if ((null == (_actualField__def1 = actualField._def) ? void 0 : _actualField__def1.typeName) === 'ZodObject') {
|
|
223
|
+
const shape = actualField._def.shape();
|
|
224
|
+
return 'midscene_location_field_flag' in shape;
|
|
225
|
+
}
|
|
226
|
+
return false;
|
|
227
|
+
};
|
|
228
|
+
const findAllMidsceneLocatorField = (zodType)=>{
|
|
229
|
+
var _zodObject__def;
|
|
230
|
+
if (!zodType) return [];
|
|
231
|
+
const zodObject = zodType;
|
|
232
|
+
if ((null == (_zodObject__def = zodObject._def) ? void 0 : _zodObject__def.typeName) === 'ZodObject' && zodObject.shape) {
|
|
233
|
+
const keys = Object.keys(zodObject.shape);
|
|
234
|
+
return keys.filter((key)=>ifMidsceneLocatorField(zodObject.shape[key]));
|
|
235
|
+
}
|
|
236
|
+
return [];
|
|
237
|
+
};
|
|
238
|
+
export { common_AIActionType as AIActionType, MidsceneLocation, PointSchema, RectSchema, SizeSchema, actionSpaceTypePrefix, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, callAiFn, expandSearchArea, fillBboxParam, findAllMidsceneLocatorField, ifMidsceneLocatorField, markupImageForLLM, mergeRects, warnGPT4oSizeLimit };
|
|
218
239
|
|
|
219
240
|
//# sourceMappingURL=common.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n PlanningActionParamInputOrKeyPress,\n PlanningActionParamSleep,\n Rect,\n ScrollParam,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n call,\n callToGetJSONObject,\n getModelName,\n} from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(msgs, AIActionTypeValue);\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n if (vlLocateMode() === 'doubao-vision' || vlLocateMode() === 'vlm-ui-tars') {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode() === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(bbox, width, height);\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size) {\n if (warned) return;\n if (getModelName()?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(rect: Rect, screenSize: Size) {\n const minEdgeSize = vlLocateMode() === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const type = plan.type;\n const locate = plan.locate?.prompt!; // TODO: check if locate is null\n\n if (type === 'Tap') {\n flow.push({\n aiTap: locate!,\n });\n } else if (type === 'Hover') {\n flow.push({\n aiHover: locate!,\n });\n } else if (type === 'Input') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiInput: param.value,\n locate,\n });\n } else if (type === 'KeyboardPress') {\n const param = plan.param as PlanningActionParamInputOrKeyPress;\n flow.push({\n aiKeyboardPress: param.value,\n locate,\n });\n } else if (type === 'Scroll') {\n const param = plan.param as ScrollParam;\n flow.push({\n aiScroll: null,\n locate,\n direction: param.direction,\n scrollType: param.scrollType,\n distance: param.distance,\n });\n } else if (type === 'Sleep') {\n const param = plan.param as PlanningActionParamSleep;\n flow.push({\n sleep: param.timeMs,\n });\n } else if (\n type === 'AndroidBackButton' ||\n type === 'AndroidHomeButton' ||\n type === 'AndroidRecentAppsButton' ||\n type === 'AndroidLongPress' ||\n type === 'AndroidPull'\n ) {\n // not implemented in yaml yet\n } else if (\n type === 'Error' ||\n type === 'Assert' ||\n type === 'AssertWithoutThrow' ||\n type === 'Finished'\n ) {\n // do nothing\n } else {\n console.warn(\n `Cannot convert action ${type} to yaml flow. This should be a bug of Midscene.`,\n );\n }\n }\n\n if (sleep) {\n flow.push({\n sleep: sleep,\n });\n }\n\n return flow;\n}\n"],"names":["AIActionType","callAiFn","msgs","AIActionTypeValue","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","sleep","flow","plan","_plan_locate","type","param"],"mappings":";;;;;;;AAoCO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,eAAeC,SACpBC,IAAY,EACZC,iBAA+B;IAE/B,MAAMC,aAAa,MAAMC,oBAAuBH,MAAMC;IAEtD,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc;IAGd,IAAKF,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC;IAG9C,OAAOF;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,OACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,IAAIkB,AAAmB,oBAAnBA,kBAAsCA,AAAmB,kBAAnBA,gBACxC,OAAOT,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmB,aAAnBA,gBACF,OAAOC,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdyB,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UAAUE,MAAMJ,OAAOC;IAC1D,MAAM2B,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU;QAEvCC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,cAAa,IAAbA,KAAAA,IAAAA,cAAgB,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpD,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBAAiBd,IAAU,EAAEe,UAAgB;IAC3D,MAAMC,cAAczB,AAAmB,oBAAnBA,iBAAqC,MAAM;IAC/D,MAAM0B,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQH,MAAO;YAETI;QADf,MAAMC,OAAOF,KAAK,IAAI;QACtB,MAAM/D,SAAS,QAAAgE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAElC,IAAIC,AAAS,UAATA,MACFH,KAAK,IAAI,CAAC;YACR,OAAO9D;QACT;aACK,IAAIiE,AAAS,YAATA,MACTH,KAAK,IAAI,CAAC;YACR,SAAS9D;QACX;aACK,IAAIiE,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,SAASI,MAAM,KAAK;gBACpBlE;YACF;QACF,OAAO,IAAIiE,AAAS,oBAATA,MAA0B;YACnC,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,iBAAiBI,MAAM,KAAK;gBAC5BlE;YACF;QACF,OAAO,IAAIiE,AAAS,aAATA,MAAmB;YAC5B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,UAAU;gBACV9D;gBACA,WAAWkE,MAAM,SAAS;gBAC1B,YAAYA,MAAM,UAAU;gBAC5B,UAAUA,MAAM,QAAQ;YAC1B;QACF,OAAO,IAAID,AAAS,YAATA,MAAkB;YAC3B,MAAMC,QAAQH,KAAK,KAAK;YACxBD,KAAK,IAAI,CAAC;gBACR,OAAOI,MAAM,MAAM;YACrB;QACF,OACW,wBAATD,QACAA,AAAS,wBAATA,QACAA,AAAS,8BAATA,QACAA,AAAS,uBAATA,QACAA,AAAS,kBAATA,QAIAA,AAAS,YAATA,QACAA,AAAS,aAATA,QACAA,AAAS,yBAATA,QACAA,AAAS,eAATA,QAIA9B,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE8B,KAAK,gDAAgD,CAAC;IAGrF;IAEA,IAAIJ,OACFC,KAAK,IAAI,CAAC;QACR,OAAOD;IACT;IAGF,OAAOC;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/common.mjs","sources":["webpack://@midscene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n MidsceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@midscene/shared/utils';\n\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport { callToGetJSONObject } from './service-caller/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@midscene/shared/constants';\nimport {\n type IModelPreferences,\n getModelName,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { treeToList } from '@midscene/shared/extractor';\nimport { compositeElementInfoImg } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n}\n\nexport const actionSpaceTypePrefix = 'action_space_';\n\nexport async function callAiFn<T>(\n msgs: AIArgs,\n AIActionTypeValue: AIActionType,\n modelPreferences: IModelPreferences,\n): Promise<{ content: T; usage?: AIUsageInfo }> {\n const jsonObject = await callToGetJSONObject<T>(\n msgs,\n AIActionTypeValue,\n modelPreferences,\n );\n\n return {\n content: jsonObject.content,\n usage: jsonObject.usage,\n };\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(locate.bbox, width, height, modelPreferences);\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n): [number, number, number, number] {\n if (\n vlLocateMode(modelPreferences) === 'doubao-vision' ||\n vlLocateMode(modelPreferences) === 'vlm-ui-tars'\n ) {\n return adaptDoubaoBbox(bbox, width, height);\n }\n\n if (vlLocateMode(modelPreferences) === 'gemini') {\n return adaptGeminiBbox(bbox, width, height);\n }\n\n return adaptQwenBbox(bbox);\n}\n\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n modelPreferences: IModelPreferences,\n offsetX = 0,\n offsetY = 0,\n): Rect {\n debugInspectUtils('adaptBboxToRect', bbox, width, height, offsetX, offsetY);\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n modelPreferences,\n );\n const rect = {\n left: left + offsetX,\n top: top + offsetY,\n width: right - left,\n height: bottom - top,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(\n size: Size,\n modelPreferences: IModelPreferences,\n) {\n if (warned) return;\n if (getModelName(modelPreferences)?.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your page to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n modelPreferences: IModelPreferences,\n) {\n const minEdgeSize =\n vlLocateMode(modelPreferences) === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n rect.left = Math.max(0, rect.left - paddingSizeHorizontal);\n rect.width = Math.min(\n rect.width + paddingSizeHorizontal * 2,\n screenSize.width - rect.left,\n );\n rect.top = Math.max(0, rect.top - paddingSizeVertical);\n rect.height = Math.min(\n rect.height + paddingSizeVertical * 2,\n screenSize.height - rect.top,\n );\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): MidsceneYamlFlowItem[] {\n const flow: MidsceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const locate = plan.locate?.prompt;\n const flowKey = action.interfaceAlias || `${actionSpaceTypePrefix}${verb}`;\n\n const flowItem: MidsceneYamlFlowItem = {\n [flowKey]: locate || '',\n ...(plan.param || {}),\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\nexport const MidsceneLocation = z\n .object({\n midscene_location_field_flag: z.literal(true),\n prompt: z.string(),\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\nexport type MidsceneLocationType = z.infer<typeof MidsceneLocation>;\n\nexport const ifMidsceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject with midscene_location_field_flag\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n return 'midscene_location_field_flag' in shape;\n }\n\n return false;\n};\n\nexport const findAllMidsceneLocatorField = (\n zodType?: z.ZodType<any>,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => ifMidsceneLocatorField(zodObject.shape[key]));\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n"],"names":["AIActionType","actionSpaceTypePrefix","callAiFn","msgs","AIActionTypeValue","modelPreferences","jsonObject","callToGetJSONObject","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","vlLocateMode","adaptGeminiBbox","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rect","warned","warnGPT4oSizeLimit","size","_getModelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","_plan_locate","verb","action","flowKey","flowItem","PointSchema","z","SizeSchema","RectSchema","MidsceneLocation","ifMidsceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","findAllMidsceneLocatorField","zodType","_zodObject__def","zodObject","keys","Object","key"],"mappings":";;;;;;;;AAmCO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;WAAZA;;AAQL,MAAMC,wBAAwB;AAE9B,eAAeC,SACpBC,IAAY,EACZC,iBAA+B,EAC/BC,gBAAmC;IAEnC,MAAMC,aAAa,MAAMC,oBACvBJ,MACAC,mBACAC;IAGF,OAAO;QACL,SAASC,WAAW,OAAO;QAC3B,OAAOA,WAAW,KAAK;IACzB;AACF;AAEA,MAAME,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdT,gBAAmC;IAGnC,IAAKO,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGG,UAAUH,OAAO,IAAI,EAAEC,OAAOC,QAAQT;IAGtD,OAAOO;AACT;AAEO,SAASI,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;QACN,YAAnB,OAAOS,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGT;KAC1B;IACD,OAAOa;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCJ,KAAa,EACbC,MAAc;IAEdU,OACEX,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOG,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;YAC5CQ,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIZ,QAAS;YAC3CS,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIX,SAAU;SAC7C;QAEH,MAAM,IAAIM,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAIH,IACEc,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;QAEhEc,KAAK,GAAG,CACNT,OACAS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS,QAAQL,kBAAkB;QAE/Dc,KAAK,GAAG,CACNR,QACAQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIS,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;QACpCQ,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGf,QAAS;QACnCS,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGd,SAAU;KACrC;IAGH,MAAMI,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdT,gBAAmC;IAEnC,IACE2B,AAAmC,oBAAnCA,aAAa3B,qBACb2B,AAAmC,kBAAnCA,aAAa3B,mBAEb,OAAOkB,gBAAgBN,MAAMJ,OAAOC;IAGtC,IAAIkB,AAAmC,aAAnCA,aAAa3B,mBACf,OAAO4B,gBAAgBhB,MAAMJ,OAAOC;IAGtC,OAAOE,cAAcC;AACvB;AAEO,SAASgB,gBACdhB,IAAc,EACdJ,KAAa,EACbC,MAAc;IAEd,MAAMoB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC5C,MAAMsB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC5C,MAAMsB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGJ,QAAS;IAC7C,MAAMwB,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGH,SAAU;IAC/C,OAAO;QAACoB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdJ,KAAa,EACbC,MAAc,EACdT,gBAAmC,EACnCkC,UAAU,CAAC,EACXC,UAAU,CAAC;IAEX/B,kBAAkB,mBAAmBQ,MAAMJ,OAAOC,QAAQyB,SAASC;IACnE,MAAM,CAACN,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UACjCE,MACAJ,OACAC,QACAT;IAEF,MAAMoC,OAAO;QACX,MAAMP,OAAOK;QACb,KAAKJ,MAAMK;QACX,OAAOJ,QAAQF;QACf,QAAQG,SAASF;IACnB;IACA1B,kBAAkB,4BAA4BgC;IAC9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBACdC,IAAU,EACVvC,gBAAmC;QAG/BwC;IADJ,IAAIH,QAAQ;IACZ,IAAI,QAAAG,CAAAA,gBAAAA,aAAaxC,iBAAgB,IAA7BwC,KAAAA,IAAAA,cAAgC,WAAW,GAAG,QAAQ,CAAC,WAAW;QACpE,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,wFAAwF,CAAC;QAEhN,IACEtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpCtB,KAAK,GAAG,CAACsB,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAU5B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAAS9B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAW/B,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYhC,KAAK,GAAG,IAAI2B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChBnD,gBAAmC;IAEnC,MAAMoD,cACJzB,AAAmC,oBAAnCA,aAAa3B,oBAAwC,MAAM;IAC7D,MAAMqD,iBAAiB;IAEvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVnC,KAAK,IAAI,CAAEmC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IACNjB,KAAK,IAAI,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,IAAI,GAAGkB;IACpClB,KAAK,KAAK,GAAGnB,KAAK,GAAG,CACnBmB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA,uBACbH,WAAW,KAAK,GAAGf,KAAK,IAAI;IAE9BA,KAAK,GAAG,GAAGnB,KAAK,GAAG,CAAC,GAAGmB,KAAK,GAAG,GAAGmB;IAClCnB,KAAK,MAAM,GAAGnB,KAAK,GAAG,CACpBmB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA,qBACdJ,WAAW,MAAM,GAAGf,KAAK,GAAG;IAE9B,OAAOA;AACT;AAEO,eAAeoB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCnB,IAAU;IAEV,MAAMoB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtBtB;IACF;IACA,OAAOyB;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;YAWTK;QAVf,MAAMC,OAAOF,KAAK,IAAI;QAEtB,MAAMG,SAASN,YAAY,IAAI,CAAC,CAACM,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXhC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAE+B,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAMlE,SAAS,QAAAiE,CAAAA,eAAAA,KAAK,MAAM,AAAD,IAAVA,KAAAA,IAAAA,aAAa,MAAM;QAClC,MAAMG,UAAUD,OAAO,cAAc,IAAI,GAAG9E,wBAAwB6E,MAAM;QAE1E,MAAMG,WAAiC;YACrC,CAACD,QAAQ,EAAEpE,UAAU;YACrB,GAAIgE,KAAK,KAAK,IAAI,CAAC,CAAC;QACtB;QAEAD,KAAK,IAAI,CAACM;IACZ;IAEA,IAAIP,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMO,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAGK,MAAMG,mBAAmBH,EAAAA,MACvB,CAAC;IACN,8BAA8BA,EAAE,OAAO,CAAC;IACxC,QAAQA,EAAE,MAAM;IAChB,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAIP,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QACpC,OAAO,kCAAkCC;IAC3C;IAEA,OAAO;AACT;AAEO,MAAMC,8BAA8B,CACzCC;QAQIC;IANJ,IAAI,CAACD,SACH,OAAO,EAAE;IAIX,MAAME,YAAYF;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE,MAAQZ,uBAAuBS,UAAU,KAAK,CAACG,IAAI;IACzE;IAGA,OAAO,EAAE;AACX"}
|
|
@@ -3,8 +3,8 @@ import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
|
3
3
|
import { describeUserPage, elementByPositionWithElementInfo } from "./prompt/util.mjs";
|
|
4
4
|
import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
|
|
5
5
|
import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
|
|
6
|
-
import {
|
|
6
|
+
import { AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
7
7
|
import { plan } from "./llm-planning.mjs";
|
|
8
|
-
import { AIActionType, adaptBboxToRect, callAiFn } from "./common.mjs";
|
|
8
|
+
import { AIActionType, MidsceneLocation, PointSchema, RectSchema, SizeSchema, actionSpaceTypePrefix, adaptBboxToRect, callAiFn } from "./common.mjs";
|
|
9
9
|
import { resizeImageForUiTars, vlmPlanning } from "./ui-tars-planning.mjs";
|
|
10
|
-
export { AIActionType,
|
|
10
|
+
export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, MidsceneLocation, PointSchema, RectSchema, SizeSchema, actionSpaceTypePrefix, adaptBboxToRect, call as callAi, callAiFn, callAiFnWithStringResponse, callToGetJSONObject, describeUserPage, elementByPositionWithElementInfo, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, plan, resizeImageForUiTars, systemPromptToLocateElement, vlmPlanning };
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getIsUseQwenVl, vlLocateMode } from "@midscene/shared/env";
|
|
2
2
|
import { cropByRect, paddingToMatchBlockByBase64, preProcessImageUrl } from "@midscene/shared/img";
|
|
3
3
|
import { getDebug } from "@midscene/shared/logger";
|
|
4
4
|
import { assert } from "@midscene/shared/utils";
|
|
5
5
|
import { AIActionType, adaptBboxToRect, callAiFn, expandSearchArea, markupImageForLLM, mergeRects } from "./common.mjs";
|
|
6
|
-
import { systemPromptToAssert } from "./prompt/assertion.mjs";
|
|
7
6
|
import { extractDataQueryPrompt, systemPromptToExtract } from "./prompt/extraction.mjs";
|
|
8
7
|
import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
9
8
|
import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
|
|
@@ -58,20 +57,23 @@ const promptsToChatParam = async (multimodalPrompt)=>{
|
|
|
58
57
|
async function AiLocateElement(options) {
|
|
59
58
|
const { context, targetElementDescription, callAI } = options;
|
|
60
59
|
const { screenshotBase64 } = context;
|
|
61
|
-
const
|
|
60
|
+
const modelPreferences = {
|
|
61
|
+
intent: 'grounding'
|
|
62
|
+
};
|
|
63
|
+
const { description, elementById, insertElementByPosition } = await describeUserPage(context, modelPreferences);
|
|
62
64
|
assert(targetElementDescription, "cannot find the target element description");
|
|
63
65
|
const userInstructionPrompt = await findElementPrompt.format({
|
|
64
66
|
pageDescription: description,
|
|
65
67
|
targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
|
|
66
68
|
});
|
|
67
|
-
const systemPrompt = systemPromptToLocateElement(vlLocateMode());
|
|
69
|
+
const systemPrompt = systemPromptToLocateElement(vlLocateMode(modelPreferences));
|
|
68
70
|
let imagePayload = screenshotBase64;
|
|
69
71
|
if (options.searchConfig) {
|
|
70
72
|
assert(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
71
73
|
assert(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
72
74
|
imagePayload = options.searchConfig.imageBase64;
|
|
73
|
-
} else if ('qwen-vl' === vlLocateMode()) imagePayload = await paddingToMatchBlockByBase64(imagePayload);
|
|
74
|
-
else if (!vlLocateMode()) imagePayload = await markupImageForLLM(screenshotBase64, context.tree, context.size);
|
|
75
|
+
} else if ('qwen-vl' === vlLocateMode(modelPreferences)) imagePayload = await paddingToMatchBlockByBase64(imagePayload);
|
|
76
|
+
else if (!vlLocateMode(modelPreferences)) imagePayload = await markupImageForLLM(screenshotBase64, context.tree, context.size);
|
|
75
77
|
const msgs = [
|
|
76
78
|
{
|
|
77
79
|
role: 'system',
|
|
@@ -102,7 +104,9 @@ async function AiLocateElement(options) {
|
|
|
102
104
|
msgs.push(...addOns);
|
|
103
105
|
}
|
|
104
106
|
const callAIFn = callAI || callToGetJSONObject;
|
|
105
|
-
const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT
|
|
107
|
+
const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, {
|
|
108
|
+
intent: 'grounding'
|
|
109
|
+
});
|
|
106
110
|
const rawResponse = JSON.stringify(res.content);
|
|
107
111
|
let resRect;
|
|
108
112
|
let matchedElements = 'elements' in res.content ? res.content.elements : [];
|
|
@@ -110,7 +114,7 @@ async function AiLocateElement(options) {
|
|
|
110
114
|
try {
|
|
111
115
|
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
|
112
116
|
var _options_searchConfig_rect, _options_searchConfig, _options_searchConfig_rect1, _options_searchConfig1, _options_searchConfig_rect2, _options_searchConfig2, _options_searchConfig_rect3, _options_searchConfig3;
|
|
113
|
-
resRect = adaptBboxToRect(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
|
|
117
|
+
resRect = adaptBboxToRect(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, modelPreferences, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
|
|
114
118
|
debugInspect('resRect', resRect);
|
|
115
119
|
const rectCenter = {
|
|
116
120
|
x: resRect.left + resRect.width / 2,
|
|
@@ -151,7 +155,10 @@ async function AiLocateElement(options) {
|
|
|
151
155
|
async function AiLocateSection(options) {
|
|
152
156
|
const { context, sectionDescription } = options;
|
|
153
157
|
const { screenshotBase64 } = context;
|
|
154
|
-
const
|
|
158
|
+
const modelPreferences = {
|
|
159
|
+
intent: 'grounding'
|
|
160
|
+
};
|
|
161
|
+
const systemPrompt = systemPromptToLocateSection(vlLocateMode(modelPreferences));
|
|
155
162
|
const sectionLocatorInstructionText = await sectionLocatorInstruction.format({
|
|
156
163
|
sectionDescription: extraTextFromUserPrompt(sectionDescription)
|
|
157
164
|
});
|
|
@@ -184,26 +191,30 @@ async function AiLocateSection(options) {
|
|
|
184
191
|
});
|
|
185
192
|
msgs.push(...addOns);
|
|
186
193
|
}
|
|
187
|
-
const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA
|
|
194
|
+
const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA, {
|
|
195
|
+
intent: 'grounding'
|
|
196
|
+
});
|
|
188
197
|
let sectionRect;
|
|
189
198
|
const sectionBbox = result.content.bbox;
|
|
190
199
|
if (sectionBbox) {
|
|
191
|
-
const targetRect = adaptBboxToRect(sectionBbox, context.size.width, context.size.height);
|
|
200
|
+
const targetRect = adaptBboxToRect(sectionBbox, context.size.width, context.size.height, modelPreferences);
|
|
192
201
|
debugSection('original targetRect %j', targetRect);
|
|
193
202
|
const referenceBboxList = result.content.references_bbox || [];
|
|
194
203
|
debugSection('referenceBboxList %j', referenceBboxList);
|
|
195
|
-
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.size.width, context.size.height));
|
|
204
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.size.width, context.size.height, modelPreferences));
|
|
196
205
|
debugSection('referenceRects %j', referenceRects);
|
|
197
206
|
const mergedRect = mergeRects([
|
|
198
207
|
targetRect,
|
|
199
208
|
...referenceRects
|
|
200
209
|
]);
|
|
201
210
|
debugSection('mergedRect %j', mergedRect);
|
|
202
|
-
sectionRect = expandSearchArea(mergedRect, context.size);
|
|
211
|
+
sectionRect = expandSearchArea(mergedRect, context.size, modelPreferences);
|
|
203
212
|
debugSection('expanded sectionRect %j', sectionRect);
|
|
204
213
|
}
|
|
205
214
|
let imageBase64 = screenshotBase64;
|
|
206
|
-
if (sectionRect) imageBase64 = await cropByRect(screenshotBase64, sectionRect,
|
|
215
|
+
if (sectionRect) imageBase64 = await cropByRect(screenshotBase64, sectionRect, getIsUseQwenVl({
|
|
216
|
+
intent: 'grounding'
|
|
217
|
+
}));
|
|
207
218
|
return {
|
|
208
219
|
rect: sectionRect,
|
|
209
220
|
imageBase64,
|
|
@@ -214,10 +225,10 @@ async function AiLocateSection(options) {
|
|
|
214
225
|
}
|
|
215
226
|
async function AiExtractElementInfo(options) {
|
|
216
227
|
var _options_extractOption;
|
|
217
|
-
const { dataQuery, context, extractOption, multimodalPrompt } = options;
|
|
228
|
+
const { dataQuery, context, extractOption, multimodalPrompt, modelPreferences } = options;
|
|
218
229
|
const systemPrompt = systemPromptToExtract();
|
|
219
230
|
const { screenshotBase64 } = context;
|
|
220
|
-
const { description, elementById } = await describeUserPage(context, {
|
|
231
|
+
const { description, elementById } = await describeUserPage(context, modelPreferences, {
|
|
221
232
|
truncateTextLength: 200,
|
|
222
233
|
filterNonTextContent: false,
|
|
223
234
|
visibleOnly: false,
|
|
@@ -257,61 +268,13 @@ async function AiExtractElementInfo(options) {
|
|
|
257
268
|
});
|
|
258
269
|
msgs.push(...addOns);
|
|
259
270
|
}
|
|
260
|
-
const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA);
|
|
271
|
+
const result = await callAiFn(msgs, AIActionType.EXTRACT_DATA, modelPreferences);
|
|
261
272
|
return {
|
|
262
273
|
parseResult: result.content,
|
|
263
274
|
elementById,
|
|
264
275
|
usage: result.usage
|
|
265
276
|
};
|
|
266
277
|
}
|
|
267
|
-
|
|
268
|
-
const { assertion, context } = options;
|
|
269
|
-
assert(assertion, 'assertion should not be empty');
|
|
270
|
-
const { screenshotBase64 } = context;
|
|
271
|
-
const systemPrompt = systemPromptToAssert({
|
|
272
|
-
isUITars: getAIConfigInBoolean(MIDSCENE_USE_VLM_UI_TARS)
|
|
273
|
-
});
|
|
274
|
-
const assertionText = extraTextFromUserPrompt(assertion);
|
|
275
|
-
const msgs = [
|
|
276
|
-
{
|
|
277
|
-
role: 'system',
|
|
278
|
-
content: systemPrompt
|
|
279
|
-
},
|
|
280
|
-
{
|
|
281
|
-
role: 'user',
|
|
282
|
-
content: [
|
|
283
|
-
{
|
|
284
|
-
type: 'image_url',
|
|
285
|
-
image_url: {
|
|
286
|
-
url: screenshotBase64,
|
|
287
|
-
detail: 'high'
|
|
288
|
-
}
|
|
289
|
-
},
|
|
290
|
-
{
|
|
291
|
-
type: 'text',
|
|
292
|
-
text: `
|
|
293
|
-
Here is the assertion. Please tell whether it is truthy according to the screenshot.
|
|
294
|
-
=====================================
|
|
295
|
-
${assertionText}
|
|
296
|
-
=====================================
|
|
297
|
-
`
|
|
298
|
-
}
|
|
299
|
-
]
|
|
300
|
-
}
|
|
301
|
-
];
|
|
302
|
-
if ('string' != typeof assertion) {
|
|
303
|
-
const addOns = await promptsToChatParam({
|
|
304
|
-
images: assertion.images,
|
|
305
|
-
convertHttpImage2Base64: assertion.convertHttpImage2Base64
|
|
306
|
-
});
|
|
307
|
-
msgs.push(...addOns);
|
|
308
|
-
}
|
|
309
|
-
const { content: assertResult, usage } = await callAiFn(msgs, AIActionType.ASSERT);
|
|
310
|
-
return {
|
|
311
|
-
content: assertResult,
|
|
312
|
-
usage
|
|
313
|
-
};
|
|
314
|
-
}
|
|
315
|
-
export { AiAssert, AiExtractElementInfo, AiLocateElement, AiLocateSection };
|
|
278
|
+
export { AiExtractElementInfo, AiLocateElement, AiLocateSection };
|
|
316
279
|
|
|
317
280
|
//# sourceMappingURL=inspect.mjs.map
|