@midscene/core 0.26.7-beta-20250821121810.0 → 0.26.7-beta-20250821124744.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/ui-tars-planning.mjs +34 -47
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/insight/index.mjs +4 -4
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/ai-model/ui-tars-planning.js +34 -47
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/insight/index.js +4 -4
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/package.json +3 -3
|
@@ -39,33 +39,23 @@ async function vlmPlanning(options) {
|
|
|
39
39
|
},
|
|
40
40
|
modelVer: modelVer || void 0
|
|
41
41
|
});
|
|
42
|
-
debug('modelVer', modelVer, 'parsed', JSON.stringify(parsed));
|
|
42
|
+
debug('ui-tars modelVer', modelVer, ', parsed', JSON.stringify(parsed));
|
|
43
43
|
const transformActions = [];
|
|
44
44
|
parsed.forEach((action)=>{
|
|
45
45
|
if ('click' === action.action_type) {
|
|
46
46
|
assert(action.action_inputs.start_box, 'start_box is required');
|
|
47
47
|
const point = getPoint(action.action_inputs.start_box, size);
|
|
48
|
-
transformActions.push({
|
|
49
|
-
type: 'Locate',
|
|
50
|
-
param: {},
|
|
51
|
-
locate: {
|
|
52
|
-
prompt: action.thought || '',
|
|
53
|
-
bbox: pointToBbox({
|
|
54
|
-
x: point[0],
|
|
55
|
-
y: point[1]
|
|
56
|
-
}, size.width, size.height)
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
48
|
transformActions.push({
|
|
60
49
|
type: 'Tap',
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
50
|
+
param: {
|
|
51
|
+
locate: {
|
|
52
|
+
prompt: action.thought || '',
|
|
53
|
+
bbox: pointToBbox({
|
|
54
|
+
x: point[0],
|
|
55
|
+
y: point[1]
|
|
56
|
+
}, size.width, size.height)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
69
59
|
});
|
|
70
60
|
} else if ('drag' === action.action_type) {
|
|
71
61
|
assert(action.action_inputs.start_box, 'start_box is required');
|
|
@@ -73,18 +63,23 @@ async function vlmPlanning(options) {
|
|
|
73
63
|
const startPoint = getPoint(action.action_inputs.start_box, size);
|
|
74
64
|
const endPoint = getPoint(action.action_inputs.end_box, size);
|
|
75
65
|
transformActions.push({
|
|
76
|
-
type: '
|
|
66
|
+
type: 'DragAndDrop',
|
|
77
67
|
param: {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
from: {
|
|
69
|
+
prompt: action.thought || '',
|
|
70
|
+
bbox: pointToBbox({
|
|
71
|
+
x: startPoint[0],
|
|
72
|
+
y: startPoint[1]
|
|
73
|
+
}, size.width, size.height)
|
|
81
74
|
},
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
75
|
+
to: {
|
|
76
|
+
prompt: action.thought || '',
|
|
77
|
+
bbox: pointToBbox({
|
|
78
|
+
x: endPoint[0],
|
|
79
|
+
y: endPoint[1]
|
|
80
|
+
}, size.width, size.height)
|
|
85
81
|
}
|
|
86
82
|
},
|
|
87
|
-
locate: null,
|
|
88
83
|
thought: action.thought || ''
|
|
89
84
|
});
|
|
90
85
|
} else if ('type' === action.action_type) transformActions.push({
|
|
@@ -92,7 +87,6 @@ async function vlmPlanning(options) {
|
|
|
92
87
|
param: {
|
|
93
88
|
value: action.action_inputs.content
|
|
94
89
|
},
|
|
95
|
-
locate: null,
|
|
96
90
|
thought: action.thought || ''
|
|
97
91
|
});
|
|
98
92
|
else if ('scroll' === action.action_type) transformActions.push({
|
|
@@ -100,13 +94,11 @@ async function vlmPlanning(options) {
|
|
|
100
94
|
param: {
|
|
101
95
|
direction: action.action_inputs.direction
|
|
102
96
|
},
|
|
103
|
-
locate: null,
|
|
104
97
|
thought: action.thought || ''
|
|
105
98
|
});
|
|
106
99
|
else if ('finished' === action.action_type) transformActions.push({
|
|
107
100
|
type: 'Finished',
|
|
108
101
|
param: {},
|
|
109
|
-
locate: null,
|
|
110
102
|
thought: action.thought || ''
|
|
111
103
|
});
|
|
112
104
|
else if ('hotkey' === action.action_type) if (action.action_inputs.key) {
|
|
@@ -114,9 +106,8 @@ async function vlmPlanning(options) {
|
|
|
114
106
|
transformActions.push({
|
|
115
107
|
type: 'KeyboardPress',
|
|
116
108
|
param: {
|
|
117
|
-
|
|
109
|
+
keyName: keys
|
|
118
110
|
},
|
|
119
|
-
locate: null,
|
|
120
111
|
thought: action.thought || ''
|
|
121
112
|
});
|
|
122
113
|
} else console.warn('No key found in action: hotkey. Will not perform action.');
|
|
@@ -125,19 +116,16 @@ async function vlmPlanning(options) {
|
|
|
125
116
|
param: {
|
|
126
117
|
timeMs: 1000
|
|
127
118
|
},
|
|
128
|
-
locate: null,
|
|
129
119
|
thought: action.thought || ''
|
|
130
120
|
});
|
|
131
121
|
else if ('androidBackButton' === action.action_type) transformActions.push({
|
|
132
122
|
type: 'AndroidBackButton',
|
|
133
123
|
param: {},
|
|
134
|
-
locate: null,
|
|
135
124
|
thought: action.thought || ''
|
|
136
125
|
});
|
|
137
126
|
else if ('androidHomeButton' === action.action_type) transformActions.push({
|
|
138
127
|
type: 'AndroidHomeButton',
|
|
139
128
|
param: {},
|
|
140
|
-
locate: null,
|
|
141
129
|
thought: action.thought || ''
|
|
142
130
|
});
|
|
143
131
|
else if ('androidRecentAppsButton' === action.action_type) transformActions.push({
|
|
@@ -145,33 +133,31 @@ async function vlmPlanning(options) {
|
|
|
145
133
|
param: {}
|
|
146
134
|
});
|
|
147
135
|
else if ('androidLongPress' === action.action_type) {
|
|
148
|
-
assert(action.action_inputs.
|
|
149
|
-
const
|
|
136
|
+
assert(action.action_inputs.start_box, 'start_box is required for androidLongPress');
|
|
137
|
+
const start = getPoint(action.action_inputs.start_box, size);
|
|
150
138
|
transformActions.push({
|
|
151
139
|
type: 'AndroidLongPress',
|
|
152
140
|
param: {
|
|
153
|
-
|
|
154
|
-
|
|
141
|
+
locate: {
|
|
142
|
+
prompt: action.thought || '',
|
|
143
|
+
bbox: pointToBbox({
|
|
144
|
+
x: start[0],
|
|
145
|
+
y: start[1]
|
|
146
|
+
}, size.width, size.height)
|
|
147
|
+
},
|
|
155
148
|
duration: 1000
|
|
156
149
|
},
|
|
157
|
-
locate: null,
|
|
158
150
|
thought: action.thought || ''
|
|
159
151
|
});
|
|
160
152
|
} else if ('androidPull' === action.action_type) {
|
|
161
153
|
const pullDirection = action.action_inputs.direction || 'down';
|
|
162
|
-
const startPoint = action.action_inputs.start_coords ? {
|
|
163
|
-
x: action.action_inputs.start_coords[0],
|
|
164
|
-
y: action.action_inputs.start_coords[1]
|
|
165
|
-
} : void 0;
|
|
166
154
|
transformActions.push({
|
|
167
155
|
type: 'AndroidPull',
|
|
168
156
|
param: {
|
|
169
157
|
direction: pullDirection,
|
|
170
|
-
startPoint,
|
|
171
158
|
distance: action.action_inputs.distance,
|
|
172
159
|
duration: action.action_inputs.duration || 500
|
|
173
160
|
},
|
|
174
|
-
locate: null,
|
|
175
161
|
thought: action.thought || ''
|
|
176
162
|
});
|
|
177
163
|
}
|
|
@@ -182,6 +168,7 @@ async function vlmPlanning(options) {
|
|
|
182
168
|
parsed
|
|
183
169
|
}
|
|
184
170
|
});
|
|
171
|
+
debug('transformActions', JSON.stringify(transformActions, null, 2));
|
|
185
172
|
return {
|
|
186
173
|
actions: transformActions,
|
|
187
174
|
actionsFromModel: parsed,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n MidsceneYamlFlowItem,\n PlanningAction,\n Size,\n} from '@/types';\nimport {\n type IModelPreferences,\n UITarsModelVersion,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { AIActionType } from './common';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { call } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait'\n | 'androidBackButton'\n | 'androidHomeButton'\n | 'androidRecentAppsButton'\n | 'androidLongPress'\n | 'androidPull';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function vlmPlanning(options: {\n userInstruction: string;\n conversationHistory: ChatCompletionMessageParam[];\n size: { width: number; height: number };\n modelPreferences: IModelPreferences;\n}): Promise<{\n actions: PlanningAction<any>[];\n actionsFromModel: ReturnType<typeof actionParser>['parsed'];\n action_summary: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n}> {\n const { conversationHistory, userInstruction, size, modelPreferences } =\n options;\n const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;\n\n const res = await call(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory,\n ],\n AIActionType.INSPECT_ELEMENT,\n modelPreferences,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const modelVer = uiTarsModelVersion(modelPreferences);\n\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: modelVer || undefined,\n });\n\n debug('modelVer', modelVer, 'parsed', JSON.stringify(parsed));\n\n const transformActions: PlanningAction[] = [];\n parsed.forEach((action) => {\n if (action.action_type === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Locate',\n param: {},\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n });\n transformActions.push({\n type: 'Tap',\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n param: action.thought || '',\n });\n } else if (action.action_type === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'Drag',\n param: {\n start_box: { x: startPoint[0], y: startPoint[1] },\n end_box: { x: endPoint[0], y: endPoint[1] },\n },\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'finished') {\n transformActions.push({\n type: 'Finished',\n param: {},\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n value: keys,\n },\n locate: null,\n thought: action.thought || '',\n });\n }\n } else if (action.action_type === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidBackButton') {\n transformActions.push({\n type: 'AndroidBackButton',\n param: {},\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidHomeButton') {\n transformActions.push({\n type: 'AndroidHomeButton',\n param: {},\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidRecentAppsButton') {\n transformActions.push({\n type: 'AndroidRecentAppsButton',\n param: {},\n });\n } else if (action.action_type === 'androidLongPress') {\n assert(\n action.action_inputs.start_coords,\n 'start_coords is required for androidLongPress',\n );\n const point = action.action_inputs.start_coords;\n transformActions.push({\n type: 'AndroidLongPress',\n param: {\n x: point[0],\n y: point[1],\n duration: 1000,\n },\n locate: null,\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidPull') {\n const pullDirection = action.action_inputs.direction || 'down';\n const startPoint = action.action_inputs.start_coords\n ? {\n x: action.action_inputs.start_coords[0],\n y: action.action_inputs.start_coords[1],\n }\n : undefined;\n\n transformActions.push({\n type: 'AndroidPull',\n param: {\n direction: pullDirection as 'up' | 'down',\n startPoint,\n distance: (action.action_inputs as any).distance,\n duration: (action.action_inputs as any).duration || 500,\n },\n locate: null,\n thought: action.thought || '',\n });\n }\n });\n\n if (transformActions.length === 0) {\n throw new Error(`No actions found, response: ${res.content}`, {\n cause: {\n prediction: res.content,\n parsed,\n },\n });\n }\n\n return {\n actions: transformActions,\n actionsFromModel: parsed,\n action_summary: getSummary(res.content),\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\ninterface AndroidLongPressAction extends BaseAction {\n action_type: 'androidLongPress';\n action_inputs: {\n start_coords: [number, number]; // Coordinates for long press\n duration?: number; // Duration in milliseconds\n };\n}\n\nexport type Action =\n | ClickAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction\n | AndroidLongPressAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n modelPreferences: IModelPreferences,\n) {\n if (\n vlLocateMode(modelPreferences) === 'vlm-ui-tars' &&\n uiTarsModelVersion(modelPreferences) === UITarsModelVersion.V1_5\n ) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","vlmPlanning","options","conversationHistory","userInstruction","size","modelPreferences","systemPrompt","getUiTarsPlanningPrompt","res","call","AIActionType","convertedText","convertBboxToCoordinates","modelVer","uiTarsModelVersion","parsed","actionParser","undefined","JSON","transformActions","action","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","pullDirection","Error","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","resizeImageForUiTars","imageBase64","vlLocateMode","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;;;;;;;AAmCA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,YAAYC,OAKjC;IAQC,MAAM,EAAEC,mBAAmB,EAAEC,eAAe,EAAEC,IAAI,EAAEC,gBAAgB,EAAE,GACpEJ;IACF,MAAMK,eAAeC,4BAA4BJ;IAEjD,MAAMK,MAAM,MAAMC,KAChB;QACE;YACE,MAAM;YACN,SAASH;QACX;WACGJ;KACJ,EACDQ,aAAa,eAAe,EAC5BL;IAEF,MAAMM,gBAAgBC,yBAAyBJ,IAAI,OAAO;IAE1D,MAAMK,WAAWC,mBAAmBT;IAEpC,MAAM,EAAEU,MAAM,EAAE,GAAGC,aAAa;QAC9B,YAAYL;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOP,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUS,YAAYI;IACxB;IAEAzB,MAAM,YAAYqB,UAAU,UAAUK,KAAK,SAAS,CAACH;IAErD,MAAMI,mBAAqC,EAAE;IAC7CJ,OAAO,OAAO,CAAC,CAACK;QACd,IAAIA,AAAuB,YAAvBA,OAAO,WAAW,EAAc;YAClCC,OAAOD,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxB,QAAQ0B,SAASF,OAAO,aAAa,CAAC,SAAS,EAAEhB;YACvDe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,QAAQ;oBACN,QAAQC,OAAO,OAAO,IAAI;oBAC1B,MAAMzB,YACJ;wBAAE,GAAGC,KAAK,CAAC,EAAE;wBAAE,GAAGA,KAAK,CAAC,EAAE;oBAAC,GAC3BQ,KAAK,KAAK,EACVA,KAAK,MAAM;gBAEf;YACF;YACAe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,QAAQ;oBACN,QAAQC,OAAO,OAAO,IAAI;oBAC1B,MAAMzB,YACJ;wBAAE,GAAGC,KAAK,CAAC,EAAE;wBAAE,GAAGA,KAAK,CAAC,EAAE;oBAAC,GAC3BQ,KAAK,KAAK,EACVA,KAAK,MAAM;gBAEf;gBACA,OAAOgB,OAAO,OAAO,IAAI;YAC3B;QACF,OAAO,IAAIA,AAAuB,WAAvBA,OAAO,WAAW,EAAa;YACxCC,OAAOD,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCC,OAAOD,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMG,aAAaD,SAASF,OAAO,aAAa,CAAC,SAAS,EAAEhB;YAC5D,MAAMoB,WAAWF,SAASF,OAAO,aAAa,CAAC,OAAO,EAAEhB;YACxDe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,WAAW;wBAAE,GAAGI,UAAU,CAAC,EAAE;wBAAE,GAAGA,UAAU,CAAC,EAAE;oBAAC;oBAChD,SAAS;wBAAE,GAAGC,QAAQ,CAAC,EAAE;wBAAE,GAAGA,QAAQ,CAAC,EAAE;oBAAC;gBAC5C;gBACA,QAAQ;gBACR,SAASJ,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIA,AAAuB,WAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOC,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,QAAQ;YACR,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,aAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWC,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,QAAQ;YACR,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,eAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,QAAQ;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,aAAvBA,OAAO,WAAW,EAC3B,IAAKA,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMK,OAAOC,qBAAqBN,OAAO,aAAa,CAAC,GAAG;YAE1DD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,OAAOM;gBACT;gBACA,QAAQ;gBACR,SAASL,OAAO,OAAO,IAAI;YAC7B;QACF,OAdEO,QAAQ,IAAI,CACV;aAcC,IAAIP,AAAuB,WAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,QAAQ;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,wBAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,QAAQ;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,wBAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,QAAQ;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,8BAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;QACV;aACK,IAAIC,AAAuB,uBAAvBA,OAAO,WAAW,EAAyB;YACpDC,OACED,OAAO,aAAa,CAAC,YAAY,EACjC;YAEF,MAAMxB,QAAQwB,OAAO,aAAa,CAAC,YAAY;YAC/CD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,GAAGvB,KAAK,CAAC,EAAE;oBACX,GAAGA,KAAK,CAAC,EAAE;oBACX,UAAU;gBACZ;gBACA,QAAQ;gBACR,SAASwB,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIA,AAAuB,kBAAvBA,OAAO,WAAW,EAAoB;YAC/C,MAAMQ,gBAAgBR,OAAO,aAAa,CAAC,SAAS,IAAI;YACxD,MAAMG,aAAaH,OAAO,aAAa,CAAC,YAAY,GAChD;gBACE,GAAGA,OAAO,aAAa,CAAC,YAAY,CAAC,EAAE;gBACvC,GAAGA,OAAO,aAAa,CAAC,YAAY,CAAC,EAAE;YACzC,IACAH;YAEJE,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,WAAWS;oBACXL;oBACA,UAAWH,OAAO,aAAa,CAAS,QAAQ;oBAChD,UAAWA,OAAO,aAAa,CAAS,QAAQ,IAAI;gBACtD;gBACA,QAAQ;gBACR,SAASA,OAAO,OAAO,IAAI;YAC7B;QACF;IACF;IAEA,IAAID,AAA4B,MAA5BA,iBAAiB,MAAM,EACzB,MAAM,IAAIU,MAAM,CAAC,4BAA4B,EAAErB,IAAI,OAAO,EAAE,EAAE;QAC5D,OAAO;YACL,YAAYA,IAAI,OAAO;YACvBO;QACF;IACF;IAGF,OAAO;QACL,SAASI;QACT,kBAAkBJ;QAClB,gBAAgBe,WAAWtB,IAAI,OAAO;QACtC,OAAOA,IAAI,KAAK;QAChB,aAAaU,KAAK,SAAS,CAACV,IAAI,OAAO,EAAES,QAAW;IACtD;AACF;AAOA,SAASL,yBAAyBmB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI7C,KAAK,KAAK,CAAEwC,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI9C,KAAK,KAAK,CAAE0C,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASX,SAASyB,QAAgB,EAAE3C,IAAuC;IACzE,MAAM,CAACwC,GAAGC,EAAE,GAAG3B,KAAK,KAAK,CAAC6B;IAC1B,OAAO;QAACH,IAAIxC,KAAK,KAAK;QAAEyC,IAAIzC,KAAK,MAAM;KAAC;AAC1C;AA2EO,eAAe4C,qBACpBC,WAAmB,EACnB7C,IAAU,EACVC,gBAAmC;IAEnC,IACE6C,AAAmC,kBAAnCA,aAAa7C,qBACbS,mBAAmBT,sBAAsB8C,mBAAmB,IAAI,EAChE;QACA3D,MAAM,uCAAuCY;QAC7C,MAAMgD,gBAAgBhD,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAMiD,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAevD,KAAK,IAAI,CAACsD,YAAYD;YAC3C,MAAMG,WAAWxD,KAAK,KAAK,CAACK,KAAK,KAAK,GAAGkD;YACzC,MAAME,YAAYzD,KAAK,KAAK,CAACK,KAAK,MAAM,GAAGkD;YAC3C9D,MACE,2DACA+D,UACAC;YAEF,MAAMC,eAAe,MAAMC,gBAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/ui-tars-planning.mjs","sources":["webpack://@midscene/core/./src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n MidsceneYamlFlowItem,\n PlanningAction,\n Size,\n} from '@/types';\nimport {\n type IModelPreferences,\n UITarsModelVersion,\n uiTarsModelVersion,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { resizeImgBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { transformHotkeyInput } from '@midscene/shared/us-keyboard-layout';\nimport { assert } from '@midscene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\nimport { AIActionType } from './common';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { call } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait'\n | 'androidBackButton'\n | 'androidHomeButton'\n | 'androidRecentAppsButton'\n | 'androidLongPress'\n | 'androidPull';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function vlmPlanning(options: {\n userInstruction: string;\n conversationHistory: ChatCompletionMessageParam[];\n size: { width: number; height: number };\n modelPreferences: IModelPreferences;\n}): Promise<{\n actions: PlanningAction<any>[];\n actionsFromModel: ReturnType<typeof actionParser>['parsed'];\n action_summary: string;\n yamlFlow?: MidsceneYamlFlowItem[];\n usage?: AIUsageInfo;\n rawResponse?: string;\n}> {\n const { conversationHistory, userInstruction, size, modelPreferences } =\n options;\n const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;\n\n const res = await call(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory,\n ],\n AIActionType.INSPECT_ELEMENT,\n modelPreferences,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const modelVer = uiTarsModelVersion(modelPreferences);\n\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: modelVer || undefined,\n });\n\n debug('ui-tars modelVer', modelVer, ', parsed', JSON.stringify(parsed));\n\n const transformActions: PlanningAction[] = [];\n parsed.forEach((action) => {\n if (action.action_type === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Tap',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n });\n } else if (action.action_type === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (action.action_type === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (action.action_type === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (action.action_type === 'finished') {\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (action.action_type === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys,\n },\n thought: action.thought || '',\n });\n }\n } else if (action.action_type === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidBackButton') {\n transformActions.push({\n type: 'AndroidBackButton',\n param: {},\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidHomeButton') {\n transformActions.push({\n type: 'AndroidHomeButton',\n param: {},\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidRecentAppsButton') {\n transformActions.push({\n type: 'AndroidRecentAppsButton',\n param: {},\n });\n } else if (action.action_type === 'androidLongPress') {\n assert(\n action.action_inputs.start_box,\n 'start_box is required for androidLongPress',\n );\n const start = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'AndroidLongPress',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: start[0], y: start[1] },\n size.width,\n size.height,\n ),\n },\n duration: 1000,\n },\n thought: action.thought || '',\n });\n } else if (action.action_type === 'androidPull') {\n const pullDirection = action.action_inputs.direction || 'down';\n transformActions.push({\n type: 'AndroidPull',\n param: {\n direction: pullDirection as 'up' | 'down',\n distance: (action.action_inputs as any).distance,\n duration: (action.action_inputs as any).duration || 500,\n },\n thought: action.thought || '',\n });\n }\n });\n\n if (transformActions.length === 0) {\n throw new Error(`No actions found, response: ${res.content}`, {\n cause: {\n prediction: res.content,\n parsed,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n\n return {\n actions: transformActions,\n actionsFromModel: parsed,\n action_summary: getSummary(res.content),\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\ninterface AndroidLongPressAction extends BaseAction {\n action_type: 'androidLongPress';\n action_inputs: {\n start_coords: [number, number]; // Coordinates for long press\n duration?: number; // Duration in milliseconds\n };\n}\n\nexport type Action =\n | ClickAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction\n | AndroidLongPressAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n modelPreferences: IModelPreferences,\n) {\n if (\n vlLocateMode(modelPreferences) === 'vlm-ui-tars' &&\n uiTarsModelVersion(modelPreferences) === UITarsModelVersion.V1_5\n ) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","vlmPlanning","options","conversationHistory","userInstruction","size","modelPreferences","systemPrompt","getUiTarsPlanningPrompt","res","call","AIActionType","convertedText","convertBboxToCoordinates","modelVer","uiTarsModelVersion","parsed","actionParser","undefined","JSON","transformActions","action","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","start","pullDirection","Error","getSummary","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","resizeImageForUiTars","imageBase64","vlLocateMode","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;;;;;;;AAmCA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,YAAYC,OAKjC;IAQC,MAAM,EAAEC,mBAAmB,EAAEC,eAAe,EAAEC,IAAI,EAAEC,gBAAgB,EAAE,GACpEJ;IACF,MAAMK,eAAeC,4BAA4BJ;IAEjD,MAAMK,MAAM,MAAMC,KAChB;QACE;YACE,MAAM;YACN,SAASH;QACX;WACGJ;KACJ,EACDQ,aAAa,eAAe,EAC5BL;IAEF,MAAMM,gBAAgBC,yBAAyBJ,IAAI,OAAO;IAE1D,MAAMK,WAAWC,mBAAmBT;IAEpC,MAAM,EAAEU,MAAM,EAAE,GAAGC,aAAa;QAC9B,YAAYL;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOP,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUS,YAAYI;IACxB;IAEAzB,MAAM,oBAAoBqB,UAAU,YAAYK,KAAK,SAAS,CAACH;IAE/D,MAAMI,mBAAqC,EAAE;IAC7CJ,OAAO,OAAO,CAAC,CAACK;QACd,IAAIA,AAAuB,YAAvBA,OAAO,WAAW,EAAc;YAClCC,OAAOD,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAMxB,QAAQ0B,SAASF,OAAO,aAAa,CAAC,SAAS,EAAEhB;YACvDe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQC,OAAO,OAAO,IAAI;wBAC1B,MAAMzB,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BQ,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;YACF;QACF,OAAO,IAAIgB,AAAuB,WAAvBA,OAAO,WAAW,EAAa;YACxCC,OAAOD,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCC,OAAOD,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMG,aAAaD,SAASF,OAAO,aAAa,CAAC,SAAS,EAAEhB;YAC5D,MAAMoB,WAAWF,SAASF,OAAO,aAAa,CAAC,OAAO,EAAEhB;YACxDe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQC,OAAO,OAAO,IAAI;wBAC1B,MAAMzB,YACJ;4BAAE,GAAG4B,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCnB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQgB,OAAO,OAAO,IAAI;wBAC1B,MAAMzB,YACJ;4BAAE,GAAG6B,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCpB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASgB,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIA,AAAuB,WAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOC,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,aAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWC,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,eAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,aAAvBA,OAAO,WAAW,EAC3B,IAAKA,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMK,OAAOC,qBAAqBN,OAAO,aAAa,CAAC,GAAG;YAE1DD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASM;gBACX;gBACA,SAASL,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEO,QAAQ,IAAI,CACV;aAaC,IAAIP,AAAuB,WAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,wBAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,wBAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;YACR,SAASC,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIA,AAAuB,8BAAvBA,OAAO,WAAW,EAC3BD,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO,CAAC;QACV;aACK,IAAIC,AAAuB,uBAAvBA,OAAO,WAAW,EAAyB;YACpDC,OACED,OAAO,aAAa,CAAC,SAAS,EAC9B;YAEF,MAAMQ,QAAQN,SAASF,OAAO,aAAa,CAAC,SAAS,EAAEhB;YACvDe,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQC,OAAO,OAAO,IAAI;wBAC1B,MAAMzB,YACJ;4BAAE,GAAGiC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BxB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,UAAU;gBACZ;gBACA,SAASgB,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIA,AAAuB,kBAAvBA,OAAO,WAAW,EAAoB;YAC/C,MAAMS,gBAAgBT,OAAO,aAAa,CAAC,SAAS,IAAI;YACxDD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,WAAWU;oBACX,UAAWT,OAAO,aAAa,CAAS,QAAQ;oBAChD,UAAWA,OAAO,aAAa,CAAS,QAAQ,IAAI;gBACtD;gBACA,SAASA,OAAO,OAAO,IAAI;YAC7B;QACF;IACF;IAEA,IAAID,AAA4B,MAA5BA,iBAAiB,MAAM,EACzB,MAAM,IAAIW,MAAM,CAAC,4BAA4B,EAAEtB,IAAI,OAAO,EAAE,EAAE;QAC5D,OAAO;YACL,YAAYA,IAAI,OAAO;YACvBO;QACF;IACF;IAGFvB,MAAM,oBAAoB0B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IAEjE,OAAO;QACL,SAASA;QACT,kBAAkBJ;QAClB,gBAAgBgB,WAAWvB,IAAI,OAAO;QACtC,OAAOA,IAAI,KAAK;QAChB,aAAaU,KAAK,SAAS,CAACV,IAAI,OAAO,EAAES,QAAW;IACtD;AACF;AAOA,SAASL,yBAAyBoB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAI9C,KAAK,KAAK,CAAEyC,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAI/C,KAAK,KAAK,CAAE2C,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASZ,SAAS0B,QAAgB,EAAE5C,IAAuC;IACzE,MAAM,CAACyC,GAAGC,EAAE,GAAG5B,KAAK,KAAK,CAAC8B;IAC1B,OAAO;QAACH,IAAIzC,KAAK,KAAK;QAAE0C,IAAI1C,KAAK,MAAM;KAAC;AAC1C;AA2EO,eAAe6C,qBACpBC,WAAmB,EACnB9C,IAAU,EACVC,gBAAmC;IAEnC,IACE8C,AAAmC,kBAAnCA,aAAa9C,qBACbS,mBAAmBT,sBAAsB+C,mBAAmB,IAAI,EAChE;QACA5D,MAAM,uCAAuCY;QAC7C,MAAMiD,gBAAgBjD,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAMkD,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAexD,KAAK,IAAI,CAACuD,YAAYD;YAC3C,MAAMG,WAAWzD,KAAK,KAAK,CAACK,KAAK,KAAK,GAAGmD;YACzC,MAAME,YAAY1D,KAAK,KAAK,CAACK,KAAK,MAAM,GAAGmD;YAC3C/D,MACE,2DACAgE,UACAC;YAEF,MAAMC,eAAe,MAAMC,gBAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
@@ -168,6 +168,9 @@ class Insight {
|
|
|
168
168
|
const context = await this.contextRetrieverFn('describe');
|
|
169
169
|
const { screenshotBase64, size } = context;
|
|
170
170
|
assert(screenshotBase64, 'screenshot is required for insight.describe');
|
|
171
|
+
const modelPreferences = {
|
|
172
|
+
intent: 'grounding'
|
|
173
|
+
};
|
|
171
174
|
const systemPrompt = elementDescriberInstruction();
|
|
172
175
|
const defaultRectSize = 30;
|
|
173
176
|
const targetRect = Array.isArray(target) ? {
|
|
@@ -187,9 +190,6 @@ class Insight {
|
|
|
187
190
|
borderThickness: 3
|
|
188
191
|
});
|
|
189
192
|
if (null == opt ? void 0 : opt.deepThink) {
|
|
190
|
-
const modelPreferences = {
|
|
191
|
-
intent: 'grounding'
|
|
192
|
-
};
|
|
193
193
|
const searchArea = expandSearchArea(targetRect, context.size, modelPreferences);
|
|
194
194
|
debug('describe: set searchArea', searchArea);
|
|
195
195
|
imagePayload = await cropByRect(imagePayload, searchArea, getIsUseQwenVl(modelPreferences));
|
|
@@ -213,7 +213,7 @@ class Insight {
|
|
|
213
213
|
}
|
|
214
214
|
];
|
|
215
215
|
const callAIFn = this.aiVendorFn || callToGetJSONObject;
|
|
216
|
-
const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);
|
|
216
|
+
const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT, modelPreferences);
|
|
217
217
|
const { content } = res;
|
|
218
218
|
assert(!content.error, `describe failed: ${content.error}`);
|
|
219
219
|
assert(content.description, 'failed to describe the element');
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n type IModelPreferences,\n MIDSCENE_FORCE_DEEP_THINK,\n getAIConfigInBoolean,\n getIsUseQwenVl,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../ai-model/common';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n if (searchAreaPrompt && !vlLocateMode(modelPreferences)) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const modelPreferences: IModelPreferences = {\n intent: 'VQA',\n };\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelPreferences,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data && !opt?.doNotThrowError) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const modelPreferences: IModelPreferences = { intent: 'grounding' };\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelPreferences,\n );\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getIsUseQwenVl(modelPreferences),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(msgs, AIActionType.DESCRIBE_ELEMENT);\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","_parseResult_errors","callAI","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","getAIConfigInBoolean","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelPreferences","vlLocateMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","getIsUseQwenVl","msgs","callAIFn","callToGetJSONObject","res","AIActionType","content","callAiFn","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AAoDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAmCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAgB,EACO;YAkFnBC;QAjFJ,MAAM,EAAEC,MAAM,EAAE,GAAGF,OAAO,CAAC;QAC3B,MAAMG,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,qBAC5BC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAEjC,MAAMY,mBAAsC;YAC1C,QAAQ;QACV;QAEA,IAAID,oBAAoB,CAACE,aAAaD,mBAAmB;YACvDE,QAAQ,IAAI,CACV;YAEFH,mBAAmBJ;QACrB;QAEA,MAAMQ,UAAUd,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIe;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIR,kBAAkB;YACpBQ,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBJ;YACtB;YACAN,OACEc,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAER,iBAAiB,CAAC,EAChDQ,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,QAAQ1B,UAAU,IAAI,CAAC,UAAU;YACjCY;YACA,0BAA0BX;YAC1B,cAAce;QAChB;QAEA,MAAMW,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS9B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAaoB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA7B;QAGF,IAAI2B,UACF,MAAM,IAAIM,MAAMN;QAGlB5B,OACE8B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BvC,GAA0B,EAC1BwC,gBAAoC,EAKnC;YA+BGvC;QA9BJG,OACE,AAAsB,YAAtB,OAAOmC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMlC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMK,mBAAsC;YAC1C,QAAQ;QACV;QAEA,MAAMG,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAexC;YACfW;QACF;QAEA,MAAMkB,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACArC;QAGF,IAAI2B,YAAY,CAACU,QAAQ,CAAC1C,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,eAAe,AAAD,GAC3C,MAAM,IAAIsC,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,SACJkB,MAA+B,EAC/B5C,GAEC,EACwD;QACzDI,OAAOwC,QAAQ;QACf,MAAM9B,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAE+B,gBAAgB,EAAEC,IAAI,EAAE,GAAGhC;QACnCV,OAAOyC,kBAAkB;QAEzB,MAAME,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAElB,MAAMW,mBAAsC;gBAAE,QAAQ;YAAY;YAClE,MAAMI,aAAawC,iBACjBL,YACApC,QAAQ,IAAI,EACZH;YAEFf,MAAM,4BAA4BmB;YAClCsC,eAAe,MAAMG,WACnBH,cACAtC,YACA0C,eAAe9C;QAEnB;QAEA,MAAM+C,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WACJ,IAAI,CAAC,UAAU,IAAIC;QAErB,MAAMC,MAAM,MAAMF,SAASD,MAAMI,aAAa,gBAAgB;QAE9D,MAAM,EAAEC,OAAO,EAAE,GAAGF;QACpBzD,OAAO,CAAC2D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D3D,OAAO2D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IAvUA,YACEjD,OAEmE,EACnEd,GAAoB,CACpB;QAfF;QAIA,qCAAoDgE;QAEpD;QAEA;QAQE5D,OAAOU,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAGlD,IAAI,AAA2B,WAApBd,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAqTF"}
|
|
1
|
+
{"version":3,"file":"insight/index.mjs","sources":["webpack://@midscene/core/./src/insight/index.ts"],"sourcesContent":["import {\n AIActionType,\n type AIArgs,\n callAiFn,\n expandSearchArea,\n} from '@/ai-model/common';\nimport {\n AiExtractElementInfo,\n AiLocateElement,\n callToGetJSONObject,\n} from '@/ai-model/index';\nimport { AiLocateSection } from '@/ai-model/inspect';\nimport { elementDescriberInstruction } from '@/ai-model/prompt/describe';\nimport type {\n AIDescribeElementResponse,\n AIElementResponse,\n AIUsageInfo,\n BaseElement,\n DetailedLocateParam,\n DumpSubscriber,\n InsightAction,\n InsightExtractOption,\n InsightExtractParam,\n InsightOptions,\n InsightTaskInfo,\n LocateResult,\n PartialInsightDumpFromSDK,\n Rect,\n UIContext,\n} from '@/types';\nimport {\n type IModelPreferences,\n MIDSCENE_FORCE_DEEP_THINK,\n getAIConfigInBoolean,\n getIsUseQwenVl,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport { compositeElementInfoImg, cropByRect } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type { TMultimodalPrompt } from '../ai-model/common';\nimport { emitInsightDump } from './utils';\n\nexport interface LocateOpts {\n context?: UIContext<BaseElement>;\n callAI?: typeof callAiFn<AIElementResponse>;\n}\n\nexport type AnyValue<T> = {\n [K in keyof T]: unknown extends T[K] ? any : T[K];\n};\n\nconst debug = getDebug('ai:insight');\nexport default class Insight<\n ElementType extends BaseElement = BaseElement,\n ContextType extends UIContext<ElementType> = UIContext<ElementType>,\n> {\n contextRetrieverFn: (\n action: InsightAction,\n ) => Promise<ContextType> | ContextType;\n\n aiVendorFn: (...args: Array<any>) => Promise<any> = callAiFn;\n\n onceDumpUpdatedFn?: DumpSubscriber;\n\n taskInfo?: Omit<InsightTaskInfo, 'durationMs'>;\n\n constructor(\n context:\n | ContextType\n | ((action: InsightAction) => Promise<ContextType> | ContextType),\n opt?: InsightOptions,\n ) {\n assert(context, 'context is required for Insight');\n if (typeof context === 'function') {\n this.contextRetrieverFn = context;\n } else {\n this.contextRetrieverFn = () => Promise.resolve(context);\n }\n\n if (typeof opt?.aiVendorFn !== 'undefined') {\n this.aiVendorFn = opt.aiVendorFn;\n }\n if (typeof opt?.taskInfo !== 'undefined') {\n this.taskInfo = opt.taskInfo;\n }\n }\n\n async locate(\n query: DetailedLocateParam,\n opt?: LocateOpts,\n ): Promise<LocateResult> {\n const { callAI } = opt || {};\n const queryPrompt = typeof query === 'string' ? query : query.prompt;\n assert(queryPrompt, 'query is required for locate');\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n assert(typeof query === 'object', 'query should be an object for locate');\n\n const globalDeepThinkSwitch = getAIConfigInBoolean(\n MIDSCENE_FORCE_DEEP_THINK,\n );\n if (globalDeepThinkSwitch) {\n debug('globalDeepThinkSwitch', globalDeepThinkSwitch);\n }\n let searchAreaPrompt;\n if (query.deepThink || globalDeepThinkSwitch) {\n searchAreaPrompt = query.prompt;\n }\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n if (searchAreaPrompt && !vlLocateMode(modelPreferences)) {\n console.warn(\n 'The \"deepThink\" feature is not supported with multimodal LLM. Please config VL model for Midscene. https://midscenejs.com/choose-a-model',\n );\n searchAreaPrompt = undefined;\n }\n\n const context = opt?.context || (await this.contextRetrieverFn('locate'));\n\n let searchArea: Rect | undefined = undefined;\n let searchAreaRawResponse: string | undefined = undefined;\n let searchAreaUsage: AIUsageInfo | undefined = undefined;\n let searchAreaResponse:\n | Awaited<ReturnType<typeof AiLocateSection>>\n | undefined = undefined;\n if (searchAreaPrompt) {\n searchAreaResponse = await AiLocateSection({\n context,\n sectionDescription: searchAreaPrompt,\n });\n assert(\n searchAreaResponse.rect,\n `cannot find search area for \"${searchAreaPrompt}\"${\n searchAreaResponse.error ? `: ${searchAreaResponse.error}` : ''\n }`,\n );\n searchAreaRawResponse = searchAreaResponse.rawResponse;\n searchAreaUsage = searchAreaResponse.usage;\n searchArea = searchAreaResponse.rect;\n }\n\n const startTime = Date.now();\n const {\n parseResult,\n rect,\n elementById,\n rawResponse,\n usage,\n isOrderSensitive,\n } = await AiLocateElement({\n callAI: callAI || this.aiVendorFn,\n context,\n targetElementDescription: queryPrompt,\n searchConfig: searchAreaResponse,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(rawResponse),\n formatResponse: JSON.stringify(parseResult),\n usage,\n searchArea,\n searchAreaRawResponse,\n searchAreaUsage,\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI model failed to locate: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'locate',\n userQuery: {\n element: queryPrompt,\n },\n matchedElement: [],\n matchedRect: rect,\n data: null,\n taskInfo,\n deepThink: !!searchArea,\n error: errorLog,\n };\n\n const elements: BaseElement[] = [];\n (parseResult.elements || []).forEach((item) => {\n if ('id' in item) {\n const element = elementById(item?.id);\n\n if (!element) {\n console.warn(\n `locate: cannot find element id=${item.id}. Maybe an unstable response from AI model`,\n );\n return;\n }\n elements.push(element);\n }\n });\n\n emitInsightDump(\n {\n ...dumpData,\n matchedElement: elements,\n },\n dumpSubscriber,\n );\n\n if (errorLog) {\n throw new Error(errorLog);\n }\n\n assert(\n elements.length <= 1,\n `locate: multiple elements found, length = ${elements.length}`,\n );\n\n if (elements.length === 1) {\n return {\n element: {\n id: elements[0]!.id,\n indexId: elements[0]!.indexId,\n center: elements[0]!.center,\n rect: elements[0]!.rect,\n xpaths: elements[0]!.xpaths || [],\n attributes: elements[0]!.attributes,\n isOrderSensitive,\n },\n rect,\n };\n }\n return {\n element: null,\n rect,\n };\n }\n\n async extract<T>(\n dataDemand: InsightExtractParam,\n opt?: InsightExtractOption,\n multimodalPrompt?: TMultimodalPrompt,\n ): Promise<{\n data: T;\n thought?: string;\n usage?: AIUsageInfo;\n }> {\n assert(\n typeof dataDemand === 'object' || typeof dataDemand === 'string',\n `dataDemand should be object or string, but get ${typeof dataDemand}`,\n );\n const dumpSubscriber = this.onceDumpUpdatedFn;\n this.onceDumpUpdatedFn = undefined;\n\n const modelPreferences: IModelPreferences = {\n intent: 'VQA',\n };\n\n const context = await this.contextRetrieverFn('extract');\n\n const startTime = Date.now();\n const { parseResult, usage } = await AiExtractElementInfo<T>({\n context,\n dataQuery: dataDemand,\n multimodalPrompt,\n extractOption: opt,\n modelPreferences,\n });\n\n const timeCost = Date.now() - startTime;\n const taskInfo: InsightTaskInfo = {\n ...(this.taskInfo ? this.taskInfo : {}),\n durationMs: timeCost,\n rawResponse: JSON.stringify(parseResult),\n };\n\n let errorLog: string | undefined;\n if (parseResult.errors?.length) {\n errorLog = `AI response error: \\n${parseResult.errors.join('\\n')}`;\n }\n\n const dumpData: PartialInsightDumpFromSDK = {\n type: 'extract',\n userQuery: {\n dataDemand,\n },\n matchedElement: [],\n data: null,\n taskInfo,\n error: errorLog,\n };\n\n const { data, thought } = parseResult || {};\n\n // 4\n emitInsightDump(\n {\n ...dumpData,\n data,\n },\n dumpSubscriber,\n );\n\n if (errorLog && !data && !opt?.doNotThrowError) {\n throw new Error(errorLog);\n }\n\n return {\n data,\n thought,\n usage,\n };\n }\n\n async describe(\n target: Rect | [number, number],\n opt?: {\n deepThink?: boolean;\n },\n ): Promise<Pick<AIDescribeElementResponse, 'description'>> {\n assert(target, 'target is required for insight.describe');\n const context = await this.contextRetrieverFn('describe');\n const { screenshotBase64, size } = context;\n assert(screenshotBase64, 'screenshot is required for insight.describe');\n // The result of the \"describe\" function will be used for positioning, so essentially it is a form of grounding.\n const modelPreferences: IModelPreferences = { intent: 'grounding' };\n const systemPrompt = elementDescriberInstruction();\n\n // Convert [x,y] center point to Rect if needed\n const defaultRectSize = 30;\n const targetRect: Rect = Array.isArray(target)\n ? {\n left: Math.floor(target[0] - defaultRectSize / 2),\n top: Math.floor(target[1] - defaultRectSize / 2),\n width: defaultRectSize,\n height: defaultRectSize,\n }\n : target;\n\n let imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n size,\n elementsPositionInfo: [\n {\n rect: targetRect,\n },\n ],\n borderThickness: 3,\n });\n\n if (opt?.deepThink) {\n const searchArea = expandSearchArea(\n targetRect,\n context.size,\n modelPreferences,\n );\n debug('describe: set searchArea', searchArea);\n imagePayload = await cropByRect(\n imagePayload,\n searchArea,\n getIsUseQwenVl(modelPreferences),\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ],\n },\n ];\n\n const callAIFn =\n this.aiVendorFn || callToGetJSONObject<AIDescribeElementResponse>;\n\n const res = await callAIFn(\n msgs,\n AIActionType.DESCRIBE_ELEMENT,\n modelPreferences,\n );\n\n const { content } = res;\n assert(!content.error, `describe failed: ${content.error}`);\n assert(content.description, 'failed to describe the element');\n return content;\n }\n}\n"],"names":["debug","getDebug","Insight","query","opt","_parseResult_errors","callAI","queryPrompt","assert","dumpSubscriber","undefined","globalDeepThinkSwitch","getAIConfigInBoolean","MIDSCENE_FORCE_DEEP_THINK","searchAreaPrompt","modelPreferences","vlLocateMode","console","context","searchArea","searchAreaRawResponse","searchAreaUsage","searchAreaResponse","AiLocateSection","startTime","Date","parseResult","rect","elementById","rawResponse","usage","isOrderSensitive","AiLocateElement","timeCost","taskInfo","JSON","errorLog","dumpData","elements","item","element","emitInsightDump","Error","dataDemand","multimodalPrompt","AiExtractElementInfo","data","thought","target","screenshotBase64","size","systemPrompt","elementDescriberInstruction","defaultRectSize","targetRect","Array","Math","imagePayload","compositeElementInfoImg","expandSearchArea","cropByRect","getIsUseQwenVl","msgs","callAIFn","callToGetJSONObject","res","AIActionType","content","callAiFn","Promise"],"mappings":";;;;;;;;;;;;;;;;;;;AAoDA,MAAMA,QAAQC,SAAS;AACR,MAAMC;IAmCnB,MAAM,OACJC,KAA0B,EAC1BC,GAAgB,EACO;YAkFnBC;QAjFJ,MAAM,EAAEC,MAAM,EAAE,GAAGF,OAAO,CAAC;QAC3B,MAAMG,cAAc,AAAiB,YAAjB,OAAOJ,QAAqBA,QAAQA,MAAM,MAAM;QACpEK,OAAOD,aAAa;QACpB,MAAME,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzBF,OAAO,AAAiB,YAAjB,OAAOL,OAAoB;QAElC,MAAMQ,wBAAwBC,qBAC5BC;QAEF,IAAIF,uBACFX,MAAM,yBAAyBW;QAEjC,IAAIG;QACJ,IAAIX,MAAM,SAAS,IAAIQ,uBACrBG,mBAAmBX,MAAM,MAAM;QAEjC,MAAMY,mBAAsC;YAC1C,QAAQ;QACV;QAEA,IAAID,oBAAoB,CAACE,aAAaD,mBAAmB;YACvDE,QAAQ,IAAI,CACV;YAEFH,mBAAmBJ;QACrB;QAEA,MAAMQ,UAAUd,AAAAA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,OAAO,AAAD,KAAM,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE/D,IAAIe;QACJ,IAAIC;QACJ,IAAIC;QACJ,IAAIC;QAGJ,IAAIR,kBAAkB;YACpBQ,qBAAqB,MAAMC,gBAAgB;gBACzCL;gBACA,oBAAoBJ;YACtB;YACAN,OACEc,mBAAmB,IAAI,EACvB,CAAC,6BAA6B,EAAER,iBAAiB,CAAC,EAChDQ,mBAAmB,KAAK,GAAG,CAAC,EAAE,EAAEA,mBAAmB,KAAK,EAAE,GAAG,IAC7D;YAEJF,wBAAwBE,mBAAmB,WAAW;YACtDD,kBAAkBC,mBAAmB,KAAK;YAC1CH,aAAaG,mBAAmB,IAAI;QACtC;QAEA,MAAME,YAAYC,KAAK,GAAG;QAC1B,MAAM,EACJC,WAAW,EACXC,IAAI,EACJC,WAAW,EACXC,WAAW,EACXC,KAAK,EACLC,gBAAgB,EACjB,GAAG,MAAMC,gBAAgB;YACxB,QAAQ1B,UAAU,IAAI,CAAC,UAAU;YACjCY;YACA,0BAA0BX;YAC1B,cAAce;QAChB;QAEA,MAAMW,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACN;YAC5B,gBAAgBM,KAAK,SAAS,CAACT;YAC/BI;YACAX;YACAC;YACAC;QACF;QAEA,IAAIe;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,6BAA6B,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAG5E,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACT,SAAS9B;YACX;YACA,gBAAgB,EAAE;YAClB,aAAaoB;YACb,MAAM;YACNO;YACA,WAAW,CAAC,CAACf;YACb,OAAOiB;QACT;QAEA,MAAME,WAA0B,EAAE;QACjCZ,CAAAA,YAAY,QAAQ,IAAI,EAAC,EAAG,OAAO,CAAC,CAACa;YACpC,IAAI,QAAQA,MAAM;gBAChB,MAAMC,UAAUZ,YAAYW,QAAAA,OAAAA,KAAAA,IAAAA,KAAM,EAAE;gBAEpC,IAAI,CAACC,SAAS,YACZvB,QAAQ,IAAI,CACV,CAAC,+BAA+B,EAAEsB,KAAK,EAAE,CAAC,0CAA0C,CAAC;gBAIzFD,SAAS,IAAI,CAACE;YAChB;QACF;QAEAC,gBACE;YACE,GAAGJ,QAAQ;YACX,gBAAgBC;QAClB,GACA7B;QAGF,IAAI2B,UACF,MAAM,IAAIM,MAAMN;QAGlB5B,OACE8B,SAAS,MAAM,IAAI,GACnB,CAAC,0CAA0C,EAAEA,SAAS,MAAM,EAAE;QAGhE,IAAIA,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACL,SAAS;gBACP,IAAIA,QAAQ,CAAC,EAAE,CAAE,EAAE;gBACnB,SAASA,QAAQ,CAAC,EAAE,CAAE,OAAO;gBAC7B,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM;gBAC3B,MAAMA,QAAQ,CAAC,EAAE,CAAE,IAAI;gBACvB,QAAQA,QAAQ,CAAC,EAAE,CAAE,MAAM,IAAI,EAAE;gBACjC,YAAYA,QAAQ,CAAC,EAAE,CAAE,UAAU;gBACnCP;YACF;YACAJ;QACF;QAEF,OAAO;YACL,SAAS;YACTA;QACF;IACF;IAEA,MAAM,QACJgB,UAA+B,EAC/BvC,GAA0B,EAC1BwC,gBAAoC,EAKnC;YA+BGvC;QA9BJG,OACE,AAAsB,YAAtB,OAAOmC,cAA2B,AAAsB,YAAtB,OAAOA,YACzC,CAAC,+CAA+C,EAAE,OAAOA,YAAY;QAEvE,MAAMlC,iBAAiB,IAAI,CAAC,iBAAiB;QAC7C,IAAI,CAAC,iBAAiB,GAAGC;QAEzB,MAAMK,mBAAsC;YAC1C,QAAQ;QACV;QAEA,MAAMG,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAE9C,MAAMM,YAAYC,KAAK,GAAG;QAC1B,MAAM,EAAEC,WAAW,EAAEI,KAAK,EAAE,GAAG,MAAMe,qBAAwB;YAC3D3B;YACA,WAAWyB;YACXC;YACA,eAAexC;YACfW;QACF;QAEA,MAAMkB,WAAWR,KAAK,GAAG,KAAKD;QAC9B,MAAMU,WAA4B;YAChC,GAAI,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAC;YACtC,YAAYD;YACZ,aAAaE,KAAK,SAAS,CAACT;QAC9B;QAEA,IAAIU;QACJ,IAAI,QAAA/B,CAAAA,sBAAAA,YAAY,MAAM,AAAD,IAAjBA,KAAAA,IAAAA,oBAAoB,MAAM,EAC5B+B,WAAW,CAAC,qBAAqB,EAAEV,YAAY,MAAM,CAAC,IAAI,CAAC,OAAO;QAGpE,MAAMW,WAAsC;YAC1C,MAAM;YACN,WAAW;gBACTM;YACF;YACA,gBAAgB,EAAE;YAClB,MAAM;YACNT;YACA,OAAOE;QACT;QAEA,MAAM,EAAEU,IAAI,EAAEC,OAAO,EAAE,GAAGrB,eAAe,CAAC;QAG1Ce,gBACE;YACE,GAAGJ,QAAQ;YACXS;QACF,GACArC;QAGF,IAAI2B,YAAY,CAACU,QAAQ,CAAC1C,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,eAAe,AAAD,GAC3C,MAAM,IAAIsC,MAAMN;QAGlB,OAAO;YACLU;YACAC;YACAjB;QACF;IACF;IAEA,MAAM,SACJkB,MAA+B,EAC/B5C,GAEC,EACwD;QACzDI,OAAOwC,QAAQ;QACf,MAAM9B,UAAU,MAAM,IAAI,CAAC,kBAAkB,CAAC;QAC9C,MAAM,EAAE+B,gBAAgB,EAAEC,IAAI,EAAE,GAAGhC;QACnCV,OAAOyC,kBAAkB;QAEzB,MAAMlC,mBAAsC;YAAE,QAAQ;QAAY;QAClE,MAAMoC,eAAeC;QAGrB,MAAMC,kBAAkB;QACxB,MAAMC,aAAmBC,MAAM,OAAO,CAACP,UACnC;YACE,MAAMQ,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC/C,KAAKG,KAAK,KAAK,CAACR,MAAM,CAAC,EAAE,GAAGK,kBAAkB;YAC9C,OAAOA;YACP,QAAQA;QACV,IACAL;QAEJ,IAAIS,eAAe,MAAMC,wBAAwB;YAC/C,gBAAgBT;YAChBC;YACA,sBAAsB;gBACpB;oBACE,MAAMI;gBACR;aACD;YACD,iBAAiB;QACnB;QAEA,IAAIlD,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,SAAS,EAAE;YAClB,MAAMe,aAAawC,iBACjBL,YACApC,QAAQ,IAAI,EACZH;YAEFf,MAAM,4BAA4BmB;YAClCsC,eAAe,MAAMG,WACnBH,cACAtC,YACA0C,eAAe9C;QAEnB;QAEA,MAAM+C,OAAe;YACnB;gBAAE,MAAM;gBAAU,SAASX;YAAa;YACxC;gBACE,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKM;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;SACD;QAED,MAAMM,WACJ,IAAI,CAAC,UAAU,IAAIC;QAErB,MAAMC,MAAM,MAAMF,SAChBD,MACAI,aAAa,gBAAgB,EAC7BnD;QAGF,MAAM,EAAEoD,OAAO,EAAE,GAAGF;QACpBzD,OAAO,CAAC2D,QAAQ,KAAK,EAAE,CAAC,iBAAiB,EAAEA,QAAQ,KAAK,EAAE;QAC1D3D,OAAO2D,QAAQ,WAAW,EAAE;QAC5B,OAAOA;IACT;IA1UA,YACEjD,OAEmE,EACnEd,GAAoB,CACpB;QAfF;QAIA,qCAAoDgE;QAEpD;QAEA;QAQE5D,OAAOU,SAAS;QAChB,IAAI,AAAmB,cAAnB,OAAOA,SACT,IAAI,CAAC,kBAAkB,GAAGA;aAE1B,IAAI,CAAC,kBAAkB,GAAG,IAAMmD,QAAQ,OAAO,CAACnD;QAGlD,IAAI,AAA2B,WAApBd,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,UAAU,AAAD,GACvB,IAAI,CAAC,UAAU,GAAGA,IAAI,UAAU;QAElC,IAAI,AAAyB,WAAlBA,CAAAA,QAAAA,MAAAA,KAAAA,IAAAA,IAAK,QAAQ,AAAD,GACrB,IAAI,CAAC,QAAQ,GAAGA,IAAI,QAAQ;IAEhC;AAwTF"}
|