@midscene/core 1.2.3-beta-20260127070952.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/task-builder.mjs +2 -1
- package/dist/es/agent/task-builder.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +29 -17
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +3 -2
- package/dist/es/agent/utils.mjs.map +1 -1
- package/dist/es/ai-model/auto-glm/planning.mjs +15 -9
- package/dist/es/ai-model/auto-glm/planning.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +2 -2
- package/dist/es/ai-model/inspect.mjs +44 -4
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +8 -2
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +20 -2
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +25 -24
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/service/index.mjs +44 -10
- package/dist/es/service/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/task-builder.js +2 -1
- package/dist/lib/agent/task-builder.js.map +1 -1
- package/dist/lib/agent/tasks.js +28 -16
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +2 -1
- package/dist/lib/agent/utils.js.map +1 -1
- package/dist/lib/ai-model/auto-glm/planning.js +14 -8
- package/dist/lib/ai-model/auto-glm/planning.js.map +1 -1
- package/dist/lib/ai-model/index.js +3 -0
- package/dist/lib/ai-model/inspect.js +43 -3
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +7 -1
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +25 -4
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +24 -23
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/service/index.js +43 -9
- package/dist/lib/service/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/ai-model/index.d.ts +1 -1
- package/dist/types/ai-model/inspect.d.ts +3 -2
- package/dist/types/ai-model/service-caller/index.d.ts +5 -0
- package/dist/types/device/device-options.d.ts +68 -0
- package/package.json +2 -2
|
@@ -65,20 +65,29 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
65
65
|
},
|
|
66
66
|
...conversationHistory.snapshot()
|
|
67
67
|
], modelConfig);
|
|
68
|
-
|
|
68
|
+
let convertedText;
|
|
69
|
+
let parsed;
|
|
70
|
+
try {
|
|
71
|
+
convertedText = convertBboxToCoordinates(res.content);
|
|
72
|
+
const { size } = context;
|
|
73
|
+
const parseResult = (0, action_parser_namespaceObject.actionParser)({
|
|
74
|
+
prediction: convertedText,
|
|
75
|
+
factor: [
|
|
76
|
+
1000,
|
|
77
|
+
1000
|
|
78
|
+
],
|
|
79
|
+
screenContext: {
|
|
80
|
+
width: size.width,
|
|
81
|
+
height: size.height
|
|
82
|
+
},
|
|
83
|
+
modelVer: uiTarsModelVersion
|
|
84
|
+
});
|
|
85
|
+
parsed = parseResult.parsed;
|
|
86
|
+
} catch (parseError) {
|
|
87
|
+
const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
|
|
88
|
+
throw new index_js_namespaceObject.AIResponseParseError(`Parse error: ${errorMessage}`, JSON.stringify(res.content, void 0, 2), res.usage);
|
|
89
|
+
}
|
|
69
90
|
const { size } = context;
|
|
70
|
-
const { parsed } = (0, action_parser_namespaceObject.actionParser)({
|
|
71
|
-
prediction: convertedText,
|
|
72
|
-
factor: [
|
|
73
|
-
1000,
|
|
74
|
-
1000
|
|
75
|
-
],
|
|
76
|
-
screenContext: {
|
|
77
|
-
width: size.width,
|
|
78
|
-
height: size.height
|
|
79
|
-
},
|
|
80
|
-
modelVer: uiTarsModelVersion
|
|
81
|
-
});
|
|
82
91
|
debug('ui-tars modelVer', uiTarsModelVersion, ', parsed', JSON.stringify(parsed));
|
|
83
92
|
const transformActions = [];
|
|
84
93
|
const unhandledActions = [];
|
|
@@ -219,17 +228,9 @@ async function uiTarsPlanning(userInstruction, options) {
|
|
|
219
228
|
}
|
|
220
229
|
const errorMessage = [
|
|
221
230
|
'No actions found in UI-TARS response.',
|
|
222
|
-
...errorDetails
|
|
223
|
-
`\nRaw response: ${res.content}`
|
|
231
|
+
...errorDetails
|
|
224
232
|
].join('\n');
|
|
225
|
-
throw new
|
|
226
|
-
cause: {
|
|
227
|
-
prediction: res.content,
|
|
228
|
-
parsed,
|
|
229
|
-
unhandledActions,
|
|
230
|
-
convertedText
|
|
231
|
-
}
|
|
232
|
-
});
|
|
233
|
+
throw new index_js_namespaceObject.AIResponseParseError(errorMessage, JSON.stringify(res.content, void 0, 2), res.usage);
|
|
233
234
|
}
|
|
234
235
|
debug('transformActions', JSON.stringify(transformActions, null, 2));
|
|
235
236
|
const log = (0, ui_tars_planning_js_namespaceObject.getSummary)(res.content);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/ui-tars-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate: locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n `\\nRaw response: ${res.content}`,\n ].join('\\n');\n\n throw new Error(errorMessage, {\n cause: {\n prediction: res.content,\n parsed,\n unhandledActions,\n convertedText,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","console","errorDetails","types","a","errorMessage","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACoBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,mBAAmBP,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAEF,MAAMS,gBAAgBC,yBAAyBH,IAAI,OAAO;IAE1D,MAAM,EAAEI,IAAI,EAAE,GAAGZ;IACjB,MAAM,EAAEa,MAAM,EAAE,GAAGC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUT;IACZ;IAEAf,MACE,oBACAe,oBACA,YACAY,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBL,OAAO,OAAO,CAAC,CAACM;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM3B,QAAQ8B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM5B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BoB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM3B,QAAQ8B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM5B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BoB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM3B,QAAQ8B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAEvD,MAAMW,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAM5B,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BoB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEP;YAC5D,MAAMa,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEP;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAM5B,YACJ;4BAAE,GAAGiC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCZ,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQO,OAAO,OAAO,IAAI;wBAC1B,MAAM5B,YACJ;4BAAE,GAAGkC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCb,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASO,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMO,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAbES,QAAQ,IAAI,CACV;aAaC,IAAIR,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACA/B,MAAM,0BAA0BgC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMa,eAAyB,EAAE;QAGjC,IAAIhB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvBgB,aAAa,IAAI,CAAC;YAGlB,IACErB,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtBqB,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIZ,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMa,QAAQb,iBAAiB,GAAG,CAAC,CAACc,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAME,eAAe;YACnB;eACGH;YACH,CAAC,gBAAgB,EAAErB,IAAI,OAAO,EAAE;SACjC,CAAC,IAAI,CAAC;QAEP,MAAM,IAAIyB,MAAMD,cAAc;YAC5B,OAAO;gBACL,YAAYxB,IAAI,OAAO;gBACvBK;gBACAI;gBACAP;YACF;QACF;IACF;IAEAtB,MAAM,oBAAoB2B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMkB,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAW3B,IAAI,OAAO;IAElCT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASmC;IACX;IAEA,OAAO;QACL,SAASlB;QACTkB;QACA,OAAO1B,IAAI,KAAK;QAChB,aAAaO,KAAK,SAAS,CAACP,IAAI,OAAO,EAAE4B,QAAW;QACpD,wBAAwBlB;IAC1B;AACF;AAOA,SAASP,yBAAyB0B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIvD,KAAK,KAAK,CAAEkD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAIxD,KAAK,KAAK,CAAEoD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASjB,SAAS+B,QAAgB,EAAEzC,IAAuC;IACzE,MAAM,CAACsC,GAAGC,EAAE,GAAGpC,KAAK,KAAK,CAACsC;IAC1B,OAAO;QAACH,IAAItC,KAAK,KAAK;QAAEuC,IAAIvC,KAAK,MAAM;KAAC;AAC1C"}
|
|
1
|
+
{"version":3,"file":"ai-model/ui-tars-planning.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/ai-model/ui-tars-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@midscene/shared/env';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport {\n AIResponseParseError,\n callAIWithStringResponse,\n} from './service-caller/index';\n\ntype ActionType =\n | 'click'\n | 'left_double'\n | 'right_single'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig, actionContext } = options;\n const { uiTarsModelVersion } = modelConfig;\n\n let instruction = userInstruction;\n if (actionContext) {\n instruction = `<high_priority_knowledge>${actionContext}</high_priority_knowledge>\\n<user_instruction>${userInstruction}</user_instruction>`;\n }\n\n const systemPrompt = getUiTarsPlanningPrompt() + instruction;\n\n const screenshotBase64 = context.screenshot.base64;\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n modelConfig,\n );\n\n let convertedText: string;\n let parsed: ReturnType<typeof actionParser>['parsed'];\n\n try {\n convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const parseResult = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n parsed = parseResult.parsed;\n } catch (parseError) {\n // Throw AIResponseParseError with usage and rawResponse preserved\n const errorMessage =\n parseError instanceof Error ? parseError.message : String(parseError);\n throw new AIResponseParseError(\n `Parse error: ${errorMessage}`,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n const { size } = context;\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n const unhandledActions: Array<{ type: string; thought: string }> = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'Tap',\n param: {\n locate: locate,\n },\n });\n } else if (actionType === 'left_double') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'DoubleClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'right_single') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n\n const locate = {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n };\n\n transformActions.push({\n type: 'RightClick',\n param: {\n locate: locate,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys.join('+'),\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (actionType) {\n // Track unhandled action types\n unhandledActions.push({\n type: actionType,\n thought: action.thought || '',\n });\n debug('Unhandled action type:', actionType, 'thought:', action.thought);\n }\n });\n\n if (transformActions.length === 0) {\n const errorDetails: string[] = [];\n\n // Check if parsing failed\n if (parsed.length === 0) {\n errorDetails.push('Action parser returned no actions');\n\n // Check if response has Thought but no Action\n if (\n res.content.includes('Thought:') &&\n !res.content.includes('Action:')\n ) {\n errorDetails.push(\n 'Response contains \"Thought:\" but missing \"Action:\" line',\n );\n } else {\n errorDetails.push('Response may be malformed or empty');\n }\n }\n\n // Check if we have unhandled action types\n if (unhandledActions.length > 0) {\n const types = unhandledActions.map((a) => a.type).join(', ');\n errorDetails.push(`Unhandled action types: ${types}`);\n }\n\n const errorMessage = [\n 'No actions found in UI-TARS response.',\n ...errorDetails,\n ].join('\\n');\n\n // Throw AIResponseParseError with usage and rawResponse preserved\n throw new AIResponseParseError(\n errorMessage,\n JSON.stringify(res.content, undefined, 2),\n res.usage,\n );\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n shouldContinuePlanning: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface LeftDoubleAction extends BaseAction {\n action_type: 'left_double';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface RightSingleAction extends BaseAction {\n action_type: 'right_single';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | LeftDoubleAction\n | RightSingleAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","actionContext","uiTarsModelVersion","instruction","systemPrompt","getUiTarsPlanningPrompt","screenshotBase64","res","callAIWithStringResponse","convertedText","parsed","convertBboxToCoordinates","size","parseResult","actionParser","parseError","errorMessage","Error","String","AIResponseParseError","JSON","undefined","transformActions","unhandledActions","shouldContinue","action","actionType","assert","getPoint","locate","startPoint","endPoint","keys","transformHotkeyInput","console","errorDetails","types","a","log","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACuBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAKC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAEC,aAAa,EAAE,GAAGJ;IACrE,MAAM,EAAEK,kBAAkB,EAAE,GAAGF;IAE/B,IAAIG,cAAcP;IAClB,IAAIK,eACFE,cAAc,CAAC,yBAAyB,EAAEF,cAAc,8CAA8C,EAAEL,gBAAgB,mBAAmB,CAAC;IAG9I,MAAMQ,eAAeC,AAAAA,IAAAA,oCAAAA,uBAAAA,AAAAA,MAA4BF;IAEjD,MAAMG,mBAAmBP,QAAQ,UAAU,CAAC,MAAM;IAElDD,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKQ;gBACP;YACF;SACD;IACH;IAEA,MAAMC,MAAM,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAChB;QACE;YACE,MAAM;YACN,SAASJ;QACX;WACGN,oBAAoB,QAAQ;KAChC,EACDE;IAGF,IAAIS;IACJ,IAAIC;IAEJ,IAAI;QACFD,gBAAgBE,yBAAyBJ,IAAI,OAAO;QAEpD,MAAM,EAAEK,IAAI,EAAE,GAAGb;QACjB,MAAMc,cAAcC,AAAAA,IAAAA,8BAAAA,YAAAA,AAAAA,EAAa;YAC/B,YAAYL;YACZ,QAAQ;gBAAC;gBAAM;aAAK;YACpB,eAAe;gBACb,OAAOG,KAAK,KAAK;gBACjB,QAAQA,KAAK,MAAM;YACrB;YACA,UAAUV;QACZ;QACAQ,SAASG,YAAY,MAAM;IAC7B,EAAE,OAAOE,YAAY;QAEnB,MAAMC,eACJD,sBAAsBE,QAAQF,WAAW,OAAO,GAAGG,OAAOH;QAC5D,MAAM,IAAII,yBAAAA,oBAAoBA,CAC5B,CAAC,aAAa,EAAEH,cAAc,EAC9BI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEA,MAAM,EAAEK,IAAI,EAAE,GAAGb;IAEjBZ,MACE,oBACAe,oBACA,YACAkB,KAAK,SAAS,CAACV;IAGjB,MAAMY,mBAAqC,EAAE;IAC7C,MAAMC,mBAA6D,EAAE;IACrE,IAAIC,iBAAiB;IACrBd,OAAO,OAAO,CAAC,CAACe;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;YACF;QACF,OAAO,IAAIH,AAAe,kBAAfA,YAA8B;YACvCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,mBAAfA,YAA+B;YACxCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMlC,QAAQqC,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAEvD,MAAMiB,SAAS;gBACb,QAAQJ,OAAO,OAAO,IAAI;gBAC1B,MAAMnC,YACJ;oBAAE,GAAGC,KAAK,CAAC,EAAE;oBAAE,GAAGA,KAAK,CAAC,EAAE;gBAAC,GAC3BqB,KAAK,KAAK,EACVA,KAAK,MAAM;YAEf;YAEAU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQO;gBACV;gBACA,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YAAuB;YAChCC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMK,aAAaF,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEb;YAC5D,MAAMmB,WAAWH,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEb;YACxDU,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQG,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGwC,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrClB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQa,OAAO,OAAO,IAAI;wBAC1B,MAAMnC,YACJ;4BAAE,GAAGyC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCnB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASa,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOG,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWG,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASG,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMO,OAAOC,AAAAA,IAAAA,mCAAAA,oBAAAA,AAAAA,EAAqBR,OAAO,aAAa,CAAC,GAAG;YAE1DH,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASU,KAAK,IAAI,CAAC;gBACrB;gBACA,SAASP,OAAO,OAAO,IAAI;YAC7B;QACF,OAbES,QAAQ,IAAI,CACV;aAaC,IAAIR,AAAe,WAAfA,YACTJ,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASG,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,YAAY;YAErBH,iBAAiB,IAAI,CAAC;gBACpB,MAAMG;gBACN,SAASD,OAAO,OAAO,IAAI;YAC7B;YACAtC,MAAM,0BAA0BuC,YAAY,YAAYD,OAAO,OAAO;QACxE;IACF;IAEA,IAAIH,AAA4B,MAA5BA,iBAAiB,MAAM,EAAQ;QACjC,MAAMa,eAAyB,EAAE;QAGjC,IAAIzB,AAAkB,MAAlBA,OAAO,MAAM,EAAQ;YACvByB,aAAa,IAAI,CAAC;YAGlB,IACE5B,IAAI,OAAO,CAAC,QAAQ,CAAC,eACrB,CAACA,IAAI,OAAO,CAAC,QAAQ,CAAC,YAEtB4B,aAAa,IAAI,CACf;iBAGFA,aAAa,IAAI,CAAC;QAEtB;QAGA,IAAIZ,iBAAiB,MAAM,GAAG,GAAG;YAC/B,MAAMa,QAAQb,iBAAiB,GAAG,CAAC,CAACc,IAAMA,EAAE,IAAI,EAAE,IAAI,CAAC;YACvDF,aAAa,IAAI,CAAC,CAAC,wBAAwB,EAAEC,OAAO;QACtD;QAEA,MAAMpB,eAAe;YACnB;eACGmB;SACJ,CAAC,IAAI,CAAC;QAGP,MAAM,IAAIhB,yBAAAA,oBAAoBA,CAC5BH,cACAI,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW,IACvCd,IAAI,KAAK;IAEb;IAEApB,MAAM,oBAAoBiC,KAAK,SAAS,CAACE,kBAAkB,MAAM;IACjE,MAAMgB,MAAMC,AAAAA,IAAAA,oCAAAA,UAAAA,AAAAA,EAAWhC,IAAI,OAAO;IAElCT,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAASwC;IACX;IAEA,OAAO;QACL,SAAShB;QACTgB;QACA,OAAO/B,IAAI,KAAK;QAChB,aAAaa,KAAK,SAAS,CAACb,IAAI,OAAO,EAAEc,QAAW;QACpD,wBAAwBG;IAC1B;AACF;AAOA,SAASb,yBAAyB6B,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI3D,KAAK,KAAK,CAAEsD,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI5D,KAAK,KAAK,CAAEwD,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASd,SAAS4B,QAAgB,EAAE5C,IAAuC;IACzE,MAAM,CAACyC,GAAGC,EAAE,GAAGlC,KAAK,KAAK,CAACoC;IAC1B,OAAO;QAACH,IAAIzC,KAAK,KAAK;QAAE0C,IAAI1C,KAAK,MAAM;KAAC;AAC1C"}
|
|
@@ -142,19 +142,53 @@ class Service {
|
|
|
142
142
|
(0, utils_namespaceObject.assert)('object' == typeof dataDemand || 'string' == typeof dataDemand, `dataDemand should be object or string, but get ${typeof dataDemand}`);
|
|
143
143
|
const context = await this.contextRetrieverFn();
|
|
144
144
|
const startTime = Date.now();
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
145
|
+
let parseResult;
|
|
146
|
+
let rawResponse;
|
|
147
|
+
let usage;
|
|
148
|
+
let reasoning_content;
|
|
149
|
+
try {
|
|
150
|
+
const result = await (0, index_js_namespaceObject.AiExtractElementInfo)({
|
|
151
|
+
context,
|
|
152
|
+
dataQuery: dataDemand,
|
|
153
|
+
multimodalPrompt,
|
|
154
|
+
extractOption: opt,
|
|
155
|
+
modelConfig,
|
|
156
|
+
pageDescription
|
|
157
|
+
});
|
|
158
|
+
parseResult = result.parseResult;
|
|
159
|
+
rawResponse = result.rawResponse;
|
|
160
|
+
usage = result.usage;
|
|
161
|
+
reasoning_content = result.reasoning_content;
|
|
162
|
+
} catch (error) {
|
|
163
|
+
if (error instanceof index_js_namespaceObject.AIResponseParseError) {
|
|
164
|
+
const timeCost = Date.now() - startTime;
|
|
165
|
+
const taskInfo = {
|
|
166
|
+
...this.taskInfo ? this.taskInfo : {},
|
|
167
|
+
durationMs: timeCost,
|
|
168
|
+
rawResponse: error.rawResponse,
|
|
169
|
+
usage: error.usage
|
|
170
|
+
};
|
|
171
|
+
const dump = (0, external_utils_js_namespaceObject.createServiceDump)({
|
|
172
|
+
type: 'extract',
|
|
173
|
+
userQuery: {
|
|
174
|
+
dataDemand
|
|
175
|
+
},
|
|
176
|
+
matchedElement: [],
|
|
177
|
+
data: null,
|
|
178
|
+
taskInfo,
|
|
179
|
+
error: error.message
|
|
180
|
+
});
|
|
181
|
+
throw new external_types_js_namespaceObject.ServiceError(error.message, dump);
|
|
182
|
+
}
|
|
183
|
+
throw error;
|
|
184
|
+
}
|
|
153
185
|
const timeCost = Date.now() - startTime;
|
|
154
186
|
const taskInfo = {
|
|
155
187
|
...this.taskInfo ? this.taskInfo : {},
|
|
156
188
|
durationMs: timeCost,
|
|
157
|
-
rawResponse
|
|
189
|
+
rawResponse,
|
|
190
|
+
formatResponse: JSON.stringify(parseResult),
|
|
191
|
+
usage,
|
|
158
192
|
reasoning_content
|
|
159
193
|
};
|
|
160
194
|
let errorLog;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"service/index.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/service/index.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_FORCE_DEEP_THINK,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n aiVendorFn?: typeof callAIWithObjectResponse;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n aiVendorFn: Exclude<ServiceOptions['aiVendorFn'], undefined> =\n callAIWithObjectResponse;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n // just for unit test, aiVendorFn is callAIWithObjectResponse by default\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepThink\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n callAIFn: this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ServiceExtractResult<T>> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const context = await this.contextRetrieverFn();\n\n const startTime = Date.now();\n\n const { parseResult, usage, reasoning_content } =\n await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelFamily,\n );\n debug('describe: set searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn = this\n .aiVendorFn as typeof callAIWithObjectResponse<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, modelConfig);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","Service","query","opt","modelConfig","queryPrompt","assert","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","AiExtractElementInfo","data","thought","target","size","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","croppedResult","cropByRect","msgs","callAIFn","res","content","callAIWithObjectResponse","Promise"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACyCA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACR,MAAMC;IA4BnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOH,QAAqBA,QAAQA,MAAM,MAAM;QACpEI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOD,aAAa;QAEpBC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,AAAiB,YAAjB,OAAOJ,OAAoB;QAElC,MAAMK,wBAAwBC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CACrEC,oBAAAA,yBAAyBA;QAE3B,IAAIF,uBACFR,MAAM,yBAAyBQ;QAEjC,IAAIG;QACJ,IAAIR,MAAM,SAAS,IAAIK,uBACrBG,mBAAmBR,MAAM,MAAM;QAGjC,MAAM,EAAES,WAAW,EAAE,GAAGP;QAExB,IAAIM,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUZ,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIa;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIT,kBAAkB;YACpBS,qBAAqB,MAAMC,AAAAA,IAAAA,2BAAAA,eAAAA,AAAAA,EAAgB;gBACzCL;gBACA,oBAAoBL;gBACpBN;YACF;YACAE,IAAAA,sBAAAA,MAAAA,AAAAA,EACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,AAAAA,IAAAA,yBAAAA,eAAAA,AAAAA,EAAgB;YACpB,UAAU,IAAI,CAAC,UAAU;YACzBb;YACA,0BAA0BV;YAC1B,cAAcc;YACdf;QACF;QAEF,MAAMyB,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAV;YACAC;YACAC;YACAS;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,4BAA4B,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNM;YACA,WAAW,CAAC,CAACd;YACb,OAAOgB;QACT;QAEA,MAAME,WAAWX,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMY,OAAOC,AAAAA,IAAAA,kCAAAA,iBAAAA,AAAAA,EAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,kCAAAA,YAAYA,CAACL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,kCAAAA,YAAYA,CACpB,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAV;YACAW;QACF;QAGF,OAAO;YACL,SAAS;YACTX;YACAW;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BlC,WAAyB,EACzBD,GAA0B,EAC1BoC,eAAwB,EACxBC,gBAAoC,EACF;QAClClC,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMvB,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAE7C,MAAMM,YAAYC,KAAK,GAAG;QAE1B,MAAM,EAAEC,WAAW,EAAEG,KAAK,EAAEC,iBAAiB,EAAE,GAC7C,MAAMc,AAAAA,IAAAA,yBAAAA,oBAAAA,AAAAA,EAAwB;YAC5B1B;YACA,WAAWuB;YACXE;YACA,eAAerC;YACfC;YACAmC;QACF;QAEF,MAAMV,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACR;YAC5BI;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,qBAAqB,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNR;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGpB,eAAe,CAAC;QAG1C,MAAMY,OAAOC,AAAAA,IAAAA,kCAAAA,iBAAAA,AAAAA,EAAkB;YAC7B,GAAGH,QAAQ;YACXS;QACF;QAEA,IAAIV,YAAY,CAACU,MACf,MAAM,IAAIL,kCAAAA,YAAYA,CAACL,UAAUG;QAGnC,OAAO;YACLO;YACAC;YACAjB;YACAC;YACAQ;QACF;IACF;IAEA,MAAM,SACJS,MAA+B,EAC/BxC,WAAyB,EACzBD,GAEC,EACwD;QACzDG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOsC,QAAQ;QACf,MAAM7B,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAE8B,IAAI,EAAE,GAAG9B;QACjB,MAAM+B,mBAAmB/B,QAAQ,UAAU,CAAC,MAAM;QAClDT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOwC,kBAAkB;QAEzB,MAAM,EAAEnC,WAAW,EAAE,GAAGP;QACxB,MAAM2C,eAAeC,AAAAA,IAAAA,4BAAAA,2BAAAA,AAAAA;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;YAC/C,gBAAgBR;YAChBD;YACA,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAI/C,KAAK,WAAW;YAClB,MAAMa,aAAauC,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EACjBL,YACAnC,QAAQ,IAAI,EACZJ;YAEFZ,MAAM,4BAA4BiB;YAClC,MAAMwC,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BJ,cACArC,YACAL,AAAgB,iBAAhBA;YAEF0C,eAAeG,cAAc,WAAW;QAC1C;QAEA,MAAME,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WAAW,IAAI,CAClB,UAAU;QAEb,MAAMC,MAAM,MAAMD,SAASD,MAAMtD;QAEjC,MAAM,EAAEyD,OAAO,EAAE,GAAGD;QACpBtD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACuD,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1DvD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOuD,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAlTA,YACE9C,OAA2D,EAC3DZ,GAAoB,CACpB;QAVF;QAEA,qCACE2D,yBAAAA,wBAAwBA;QAE1B;QAMExD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMgD,QAAQ,OAAO,CAAChD;QAIlD,IAAI,AAA2B,WAApBZ,KAAK,YACd,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAiSF"}
|
|
1
|
+
{"version":3,"file":"service/index.js","sources":["webpack/runtime/define_property_getters","webpack/runtime/has_own_property","webpack/runtime/make_namespace_object","../../../src/service/index.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import { isAutoGLM, isUITars } from '@/ai-model/auto-glm/util';\nimport {\n AIResponseParseError,\n AiExtractElementInfo,\n AiLocateElement,\n callAIWithObjectResponse,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport { type AIArgs, expandSearchArea } from '@/common';\nimport type {\n AIDescribeElementResponse,\n AIUsageInfo,\n DetailedLocateParam,\n LocateResultWithDump,\n PartialServiceDumpFromSDK,\n Rect,\n ServiceExtractOption,\n ServiceExtractParam,\n ServiceExtractResult,\n ServiceTaskInfo,\n UIContext,\n} from '@/types';\nimport { ServiceError } from '@/types';\nimport {\n type IModelConfig,\n MIDSCENE_FORCE_DEEP_THINK,\n globalConfigManager,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../common';\nimport { createServiceDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\ninterface ServiceOptions {\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n aiVendorFn?: typeof callAIWithObjectResponse;\n}\n\nconst debug = getDebug('ai:service');\nexport default class Service {\n contextRetrieverFn: () => Promise<UIContext> | UIContext;\n\n aiVendorFn: Exclude<ServiceOptions['aiVendorFn'], undefined> =\n callAIWithObjectResponse;\n\n taskInfo?: Omit<ServiceTaskInfo, 'durationMs'>;\n\n constructor(\n context: UIContext | (() => Promise<UIContext> | UIContext),\n opt?: ServiceOptions,\n ) {\n assert(context, 'context is required for Service');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n // just for unit test, aiVendorFn is callAIWithObjectResponse by default\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt: LocateOpts,\n modelConfig: IModelConfig,\n ): Promise<LocateResultWithDump> {\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = globalConfigManager.getEnvConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n\n const { modelFamily } = modelConfig;\n\n if (searchAreaPrompt && !modelFamily) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/model-config',\n );\n searchAreaPrompt = undefined;\n }\n\n if (searchAreaPrompt && isAutoGLM(modelFamily)) {\n console.warn('The \"deepThink\" feature is not supported with AutoGLM.');\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn());\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n modelConfig,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const { parseResult, rect, rawResponse, usage, reasoning_content } =\n await AiLocateElement({\n callAIFn: this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n modelConfig,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `failed to locate element: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements = parseResult.elements || [];\n\n const dump = createServiceDump({\n ...dumpData,\n matchedElement: elements,\n });\n\n if (errorLog) {\n throw new ServiceError(errorLog, dump);\n }\n\n if (elements.length > 1) {\n throw new ServiceError(\n `locate: multiple elements found, length = ${elements.length}`,\n dump,\n );\n }\n\n if (elements.length === 1) {\n return {\n element: {\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n description: elements[0]!.description,\n },\n rect,\n dump,\n };\n }\n\n return {\n element: null,\n rect,\n dump,\n };\n }\n\n async extract<T>(\n dataDemand: ServiceExtractParam,\n modelConfig: IModelConfig,\n opt?: ServiceExtractOption,\n pageDescription?: string,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<ServiceExtractResult<T>> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const context = await this.contextRetrieverFn();\n\n const startTime = Date.now();\n\n let parseResult: Awaited<\n ReturnType<typeof AiExtractElementInfo<T>>\n >['parseResult'];\n let rawResponse: string;\n let usage: Awaited<ReturnType<typeof AiExtractElementInfo<T>>>['usage'];\n let reasoning_content: string | undefined;\n\n try {\n const result = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelConfig,\n pageDescription,\n });\n parseResult = result.parseResult;\n rawResponse = result.rawResponse;\n usage = result.usage;\n reasoning_content = result.reasoning_content;\n } catch (error) {\n if (error instanceof AIResponseParseError) {\n // Create dump with usage and rawResponse from the error\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: error.rawResponse,\n usage: error.usage,\n };\n const dump = createServiceDump({\n type: 'extract',\n userQuery: { dataDemand },\n matchedElement: [],\n data: null,\n taskInfo,\n error: error.message,\n });\n throw new ServiceError(error.message, dump);\n }\n throw error;\n }\n\n const timeCost = Date.now() - startTime;\n const taskInfo: ServiceTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse,\n formatResponse: JSON.stringify(parseResult),\n usage,\n reasoning_content,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialServiceDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n const dump = createServiceDump({\n ...dumpData,\n data,\n });\n\n if (errorLog && !data) {\n throw new ServiceError(errorLog, dump);\n }\n\n return {\n data,\n thought,\n usage,\n reasoning_content,\n dump,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n modelConfig: IModelConfig,\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for service.describe');\n const context = await this.contextRetrieverFn();\n const { size } = context;\n const screenshotBase64 = context.screenshot.base64;\n assert(screenshotBase64, 'screenshot is required for service.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const { modelFamily } = modelConfig;\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelFamily,\n );\n debug('describe: set searchArea', searchArea);\n const croppedResult = await cropByRect(\n imagePayload,\n searchArea,\n modelFamily === 'qwen2.5-vl',\n );\n imagePayload = croppedResult.imageBase64;\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn = this\n .aiVendorFn as typeof callAIWithObjectResponse<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, modelConfig);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","Service","query","opt","modelConfig","queryPrompt","assert","globalDeepThinkSwitch","globalConfigManager","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelFamily","console","undefined","isAutoGLM","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","rawResponse","usage","reasoning_content","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","dump","createServiceDump","ServiceError","dataDemand","pageDescription","multimodalPrompt","result","AiExtractElementInfo","error","AIResponseParseError","data","thought","target","size","screenshotBase64","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","croppedResult","cropByRect","msgs","callAIFn","res","content","callAIWithObjectResponse","Promise"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC0CA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AACR,MAAMC;IA4BnB,MAAM,OACJC,KAA0B,EAC1BC,GAAe,EACfC,WAAyB,EACM;QAC/B,MAAMC,cAAc,AAAiB,YAAjB,OAAOH,QAAqBA,QAAQA,MAAM,MAAM;QACpEI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOD,aAAa;QAEpBC,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,AAAiB,YAAjB,OAAOJ,OAAoB;QAElC,MAAMK,wBAAwBC,oBAAAA,mBAAAA,CAAAA,qBAAyC,CACrEC,oBAAAA,yBAAyBA;QAE3B,IAAIF,uBACFR,MAAM,yBAAyBQ;QAEjC,IAAIG;QACJ,IAAIR,MAAM,SAAS,IAAIK,uBACrBG,mBAAmBR,MAAM,MAAM;QAGjC,MAAM,EAAES,WAAW,EAAE,GAAGP;QAExB,IAAIM,oBAAoB,CAACC,aAAa;YACpCC,QAAQ,IAAI,CACV;YAEFF,mBAAmBG;QACrB;QAEA,IAAIH,oBAAoBI,AAAAA,IAAAA,wBAAAA,SAAAA,AAAAA,EAAUH,cAAc;YAC9CC,QAAQ,IAAI,CAAC;YACbF,mBAAmBG;QACrB;QAEA,MAAME,UAAUZ,KAAK,WAAY,MAAM,IAAI,CAAC,kBAAkB;QAE9D,IAAIa;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIT,kBAAkB;YACpBS,qBAAqB,MAAMC,AAAAA,IAAAA,2BAAAA,eAAAA,AAAAA,EAAgB;gBACzCL;gBACA,oBAAoBL;gBACpBN;YACF;YACAE,IAAAA,sBAAAA,MAAAA,AAAAA,EACEa,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAET,iBAAiB,CAAC,EAChDS,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEC,IAAI,EAAEC,WAAW,EAAEC,KAAK,EAAEC,iBAAiB,EAAE,GAChE,MAAMC,AAAAA,IAAAA,yBAAAA,eAAAA,AAAAA,EAAgB;YACpB,UAAU,IAAI,CAAC,UAAU;YACzBb;YACA,0BAA0BV;YAC1B,cAAcc;YACdf;QACF;QAEF,MAAMyB,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAV;YACAC;YACAC;YACAS;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,4BAA4B,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG3E,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS5B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAamB;YACb,MAAM;YACNM;YACA,WAAW,CAAC,CAACd;YACb,OAAOgB;QACT;QAEA,MAAME,WAAWX,YAAY,QAAQ,IAAI,EAAE;QAE3C,MAAMY,OAAOC,AAAAA,IAAAA,kCAAAA,iBAAAA,AAAAA,EAAkB;YAC7B,GAAGH,QAAQ;YACX,gBAAgBC;QAClB;QAEA,IAAIF,UACF,MAAM,IAAIK,kCAAAA,YAAYA,CAACL,UAAUG;QAGnC,IAAID,SAAS,MAAM,GAAG,GACpB,MAAM,IAAIG,kCAAAA,YAAYA,CACpB,CAAC,0CAA0C,EAAEH,SAAS,MAAM,EAAE,EAC9DC;QAIJ,IAAID,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,aAAaA,QAAQ,CAAC,EAAE,CAAE,WAAW;YACvC;YACAV;YACAW;QACF;QAGF,OAAO;YACL,SAAS;YACTX;YACAW;QACF;IACF;IAEA,MAAM,QACJG,UAA+B,EAC/BlC,WAAyB,EACzBD,GAA0B,EAC1BoC,eAAwB,EACxBC,gBAAoC,EACF;QAClClC,IAAAA,sBAAAA,MAAAA,AAAAA,EACE,AAAsB,YAAtB,OAAOgC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMvB,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAE7C,MAAMM,YAAYC,KAAK,GAAG;QAE1B,IAAIC;QAGJ,IAAIE;QACJ,IAAIC;QACJ,IAAIC;QAEJ,IAAI;YACF,MAAMc,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,oBAAAA,AAAAA,EAAwB;gBAC3C3B;gBACA,WAAWuB;gBACXE;gBACA,eAAerC;gBACfC;gBACAmC;YACF;YACAhB,cAAckB,OAAO,WAAW;YAChChB,cAAcgB,OAAO,WAAW;YAChCf,QAAQe,OAAO,KAAK;YACpBd,oBAAoBc,OAAO,iBAAiB;QAC9C,EAAE,OAAOE,OAAO;YACd,IAAIA,iBAAiBC,yBAAAA,oBAAoBA,EAAE;gBAEzC,MAAMf,WAAWP,KAAK,GAAG,KAAKD;gBAC9B,MAAMS,WAA4B;oBAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;oBACtC,YAAYD;oBACZ,aAAac,MAAM,WAAW;oBAC9B,OAAOA,MAAM,KAAK;gBACpB;gBACA,MAAMR,OAAOC,AAAAA,IAAAA,kCAAAA,iBAAAA,AAAAA,EAAkB;oBAC7B,MAAM;oBACN,WAAW;wBAAEE;oBAAW;oBACxB,gBAAgB,EAAE;oBAClB,MAAM;oBACNR;oBACA,OAAOa,MAAM,OAAO;gBACtB;gBACA,MAAM,IAAIN,kCAAAA,YAAYA,CAACM,MAAM,OAAO,EAAER;YACxC;YACA,MAAMQ;QACR;QAEA,MAAMd,WAAWP,KAAK,GAAG,KAAKD;QAC9B,MAAMS,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZJ;YACA,gBAAgBM,KAAK,SAAS,CAACR;YAC/BG;YACAC;QACF;QAEA,IAAIK;QACJ,IAAIT,YAAY,MAAM,EAAE,QACtBS,WAAW,CAAC,qBAAqB,EAAET,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMU,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTK;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNR;YACA,OAAOE;QACT;QAEA,MAAM,EAAEa,IAAI,EAAEC,OAAO,EAAE,GAAGvB,eAAe,CAAC;QAE1C,MAAMY,OAAOC,AAAAA,IAAAA,kCAAAA,iBAAAA,AAAAA,EAAkB;YAC7B,GAAGH,QAAQ;YACXY;QACF;QAEA,IAAIb,YAAY,CAACa,MACf,MAAM,IAAIR,kCAAAA,YAAYA,CAACL,UAAUG;QAGnC,OAAO;YACLU;YACAC;YACApB;YACAC;YACAQ;QACF;IACF;IAEA,MAAM,SACJY,MAA+B,EAC/B3C,WAAyB,EACzBD,GAEC,EACwD;QACzDG,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOyC,QAAQ;QACf,MAAMhC,UAAU,MAAM,IAAI,CAAC,kBAAkB;QAC7C,MAAM,EAAEiC,IAAI,EAAE,GAAGjC;QACjB,MAAMkC,mBAAmBlC,QAAQ,UAAU,CAAC,MAAM;QAClDT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO2C,kBAAkB;QAEzB,MAAM,EAAEtC,WAAW,EAAE,GAAGP;QACxB,MAAM8C,eAAeC,AAAAA,IAAAA,4BAAAA,2BAAAA,AAAAA;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,uBAAAA,AAAAA,EAAwB;YAC/C,gBAAgBR;YAChBD;YACA,sBAAsB;gBACpB;oBACE,MAAMK;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,KAAK,WAAW;YAClB,MAAMa,aAAa0C,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EACjBL,YACAtC,QAAQ,IAAI,EACZJ;YAEFZ,MAAM,4BAA4BiB;YAClC,MAAM2C,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1BJ,cACAxC,YACAL,AAAgB,iBAAhBA;YAEF6C,eAAeG,cAAc,WAAW;QAC1C;QAEA,MAAME,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WAAW,IAAI,CAClB,UAAU;QAEb,MAAMC,MAAM,MAAMD,SAASD,MAAMzD;QAEjC,MAAM,EAAE4D,OAAO,EAAE,GAAGD;QACpBzD,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAAC0D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D1D,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO0D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IApVA,YACEjD,OAA2D,EAC3DZ,GAAoB,CACpB;QAVF;QAEA,qCACE8D,yBAAAA,wBAAwBA;QAE1B;QAME3D,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOS,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAIlD,IAAI,AAA2B,WAApBZ,KAAK,YACd,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,KAAK,UACd,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAmUF"}
|