@midscene/core 0.26.7-beta-20250818035341.0 → 0.26.7-beta-20250820105545.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +0 -8
- package/dist/es/ai-model/action-executor.mjs.map +1 -1
- package/dist/es/ai-model/common.mjs +73 -52
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/index.mjs +3 -3
- package/dist/es/ai-model/inspect.mjs +29 -66
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +27 -24
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/assertion.mjs +1 -25
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +50 -23
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/playwright-generator.mjs +9 -3
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -1
- package/dist/es/ai-model/prompt/util.mjs +2 -2
- package/dist/es/ai-model/prompt/util.mjs.map +1 -1
- package/dist/es/ai-model/prompt/yaml-generator.mjs +9 -3
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs +75 -118
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +5 -5
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/index.mjs +3 -2
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +14 -97
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/insight/utils.mjs +1 -3
- package/dist/es/insight/utils.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +5 -6
- package/dist/es/utils.mjs.map +1 -1
- package/dist/lib/ai-model/action-executor.js +0 -8
- package/dist/lib/ai-model/action-executor.js.map +1 -1
- package/dist/lib/ai-model/common.js +97 -55
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/index.js +16 -4
- package/dist/lib/ai-model/inspect.js +29 -69
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +26 -23
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/assertion.js +2 -29
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +52 -25
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/playwright-generator.js +9 -3
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -1
- package/dist/lib/ai-model/prompt/util.js +2 -2
- package/dist/lib/ai-model/prompt/util.js.map +1 -1
- package/dist/lib/ai-model/prompt/yaml-generator.js +9 -3
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js +78 -124
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +5 -5
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/index.js +20 -7
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +10 -93
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/insight/utils.js +1 -3
- package/dist/lib/insight/utils.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +4 -5
- package/dist/lib/utils.js.map +1 -1
- package/dist/types/ai-model/common.d.ts +162 -8
- package/dist/types/ai-model/index.d.ts +2 -1
- package/dist/types/ai-model/inspect.d.ts +3 -8
- package/dist/types/ai-model/llm-planning.d.ts +1 -1
- package/dist/types/ai-model/prompt/assertion.d.ts +0 -3
- package/dist/types/ai-model/prompt/llm-planning.d.ts +2 -2
- package/dist/types/ai-model/prompt/util.d.ts +2 -1
- package/dist/types/ai-model/service-caller/index.d.ts +6 -6
- package/dist/types/ai-model/ui-tars-planning.d.ts +3 -1
- package/dist/types/index.d.ts +3 -1
- package/dist/types/insight/index.d.ts +1 -5
- package/dist/types/types.d.ts +11 -12
- package/dist/types/yaml.d.ts +7 -6
- package/package.json +4 -3
|
@@ -24,17 +24,15 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
|
|
28
|
-
AiExtractElementInfo: ()=>AiExtractElementInfo,
|
|
27
|
+
AiLocateElement: ()=>AiLocateElement,
|
|
29
28
|
AiLocateSection: ()=>AiLocateSection,
|
|
30
|
-
|
|
29
|
+
AiExtractElementInfo: ()=>AiExtractElementInfo
|
|
31
30
|
});
|
|
32
31
|
const env_namespaceObject = require("@midscene/shared/env");
|
|
33
32
|
const img_namespaceObject = require("@midscene/shared/img");
|
|
34
33
|
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
35
34
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
36
35
|
const external_common_js_namespaceObject = require("./common.js");
|
|
37
|
-
const assertion_js_namespaceObject = require("./prompt/assertion.js");
|
|
38
36
|
const extraction_js_namespaceObject = require("./prompt/extraction.js");
|
|
39
37
|
const llm_locator_js_namespaceObject = require("./prompt/llm-locator.js");
|
|
40
38
|
const llm_section_locator_js_namespaceObject = require("./prompt/llm-section-locator.js");
|
|
@@ -89,20 +87,23 @@ const promptsToChatParam = async (multimodalPrompt)=>{
|
|
|
89
87
|
async function AiLocateElement(options) {
|
|
90
88
|
const { context, targetElementDescription, callAI } = options;
|
|
91
89
|
const { screenshotBase64 } = context;
|
|
92
|
-
const
|
|
90
|
+
const modelPreferences = {
|
|
91
|
+
intent: 'grounding'
|
|
92
|
+
};
|
|
93
|
+
const { description, elementById, insertElementByPosition } = await (0, util_js_namespaceObject.describeUserPage)(context, modelPreferences);
|
|
93
94
|
(0, utils_namespaceObject.assert)(targetElementDescription, "cannot find the target element description");
|
|
94
95
|
const userInstructionPrompt = await llm_locator_js_namespaceObject.findElementPrompt.format({
|
|
95
96
|
pageDescription: description,
|
|
96
97
|
targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
|
|
97
98
|
});
|
|
98
|
-
const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)((0, env_namespaceObject.vlLocateMode)());
|
|
99
|
+
const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)((0, env_namespaceObject.vlLocateMode)(modelPreferences));
|
|
99
100
|
let imagePayload = screenshotBase64;
|
|
100
101
|
if (options.searchConfig) {
|
|
101
102
|
(0, utils_namespaceObject.assert)(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
102
103
|
(0, utils_namespaceObject.assert)(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
103
104
|
imagePayload = options.searchConfig.imageBase64;
|
|
104
|
-
} else if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
105
|
-
else if (!(0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
105
|
+
} else if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)(modelPreferences)) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
106
|
+
else if (!(0, env_namespaceObject.vlLocateMode)(modelPreferences)) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
106
107
|
const msgs = [
|
|
107
108
|
{
|
|
108
109
|
role: 'system',
|
|
@@ -133,7 +134,9 @@ async function AiLocateElement(options) {
|
|
|
133
134
|
msgs.push(...addOns);
|
|
134
135
|
}
|
|
135
136
|
const callAIFn = callAI || index_js_namespaceObject.callToGetJSONObject;
|
|
136
|
-
const res = await callAIFn(msgs, external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT
|
|
137
|
+
const res = await callAIFn(msgs, external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT, {
|
|
138
|
+
intent: 'grounding'
|
|
139
|
+
});
|
|
137
140
|
const rawResponse = JSON.stringify(res.content);
|
|
138
141
|
let resRect;
|
|
139
142
|
let matchedElements = 'elements' in res.content ? res.content.elements : [];
|
|
@@ -141,7 +144,7 @@ async function AiLocateElement(options) {
|
|
|
141
144
|
try {
|
|
142
145
|
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
|
143
146
|
var _options_searchConfig_rect, _options_searchConfig, _options_searchConfig_rect1, _options_searchConfig1, _options_searchConfig_rect2, _options_searchConfig2, _options_searchConfig_rect3, _options_searchConfig3;
|
|
144
|
-
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
|
|
147
|
+
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, modelPreferences, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
|
|
145
148
|
debugInspect('resRect', resRect);
|
|
146
149
|
const rectCenter = {
|
|
147
150
|
x: resRect.left + resRect.width / 2,
|
|
@@ -182,7 +185,10 @@ async function AiLocateElement(options) {
|
|
|
182
185
|
async function AiLocateSection(options) {
|
|
183
186
|
const { context, sectionDescription } = options;
|
|
184
187
|
const { screenshotBase64 } = context;
|
|
185
|
-
const
|
|
188
|
+
const modelPreferences = {
|
|
189
|
+
intent: 'grounding'
|
|
190
|
+
};
|
|
191
|
+
const systemPrompt = (0, llm_section_locator_js_namespaceObject.systemPromptToLocateSection)((0, env_namespaceObject.vlLocateMode)(modelPreferences));
|
|
186
192
|
const sectionLocatorInstructionText = await llm_section_locator_js_namespaceObject.sectionLocatorInstruction.format({
|
|
187
193
|
sectionDescription: extraTextFromUserPrompt(sectionDescription)
|
|
188
194
|
});
|
|
@@ -215,26 +221,30 @@ async function AiLocateSection(options) {
|
|
|
215
221
|
});
|
|
216
222
|
msgs.push(...addOns);
|
|
217
223
|
}
|
|
218
|
-
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA
|
|
224
|
+
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA, {
|
|
225
|
+
intent: 'grounding'
|
|
226
|
+
});
|
|
219
227
|
let sectionRect;
|
|
220
228
|
const sectionBbox = result.content.bbox;
|
|
221
229
|
if (sectionBbox) {
|
|
222
|
-
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height);
|
|
230
|
+
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height, modelPreferences);
|
|
223
231
|
debugSection('original targetRect %j', targetRect);
|
|
224
232
|
const referenceBboxList = result.content.references_bbox || [];
|
|
225
233
|
debugSection('referenceBboxList %j', referenceBboxList);
|
|
226
|
-
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height));
|
|
234
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height, modelPreferences));
|
|
227
235
|
debugSection('referenceRects %j', referenceRects);
|
|
228
236
|
const mergedRect = (0, external_common_js_namespaceObject.mergeRects)([
|
|
229
237
|
targetRect,
|
|
230
238
|
...referenceRects
|
|
231
239
|
]);
|
|
232
240
|
debugSection('mergedRect %j', mergedRect);
|
|
233
|
-
sectionRect = (0, external_common_js_namespaceObject.expandSearchArea)(mergedRect, context.size);
|
|
241
|
+
sectionRect = (0, external_common_js_namespaceObject.expandSearchArea)(mergedRect, context.size, modelPreferences);
|
|
234
242
|
debugSection('expanded sectionRect %j', sectionRect);
|
|
235
243
|
}
|
|
236
244
|
let imageBase64 = screenshotBase64;
|
|
237
|
-
if (sectionRect) imageBase64 = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, (0, env_namespaceObject.
|
|
245
|
+
if (sectionRect) imageBase64 = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, (0, env_namespaceObject.getIsUseQwenVl)({
|
|
246
|
+
intent: 'grounding'
|
|
247
|
+
}));
|
|
238
248
|
return {
|
|
239
249
|
rect: sectionRect,
|
|
240
250
|
imageBase64,
|
|
@@ -245,10 +255,10 @@ async function AiLocateSection(options) {
|
|
|
245
255
|
}
|
|
246
256
|
async function AiExtractElementInfo(options) {
|
|
247
257
|
var _options_extractOption;
|
|
248
|
-
const { dataQuery, context, extractOption, multimodalPrompt } = options;
|
|
258
|
+
const { dataQuery, context, extractOption, multimodalPrompt, modelPreferences } = options;
|
|
249
259
|
const systemPrompt = (0, extraction_js_namespaceObject.systemPromptToExtract)();
|
|
250
260
|
const { screenshotBase64 } = context;
|
|
251
|
-
const { description, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context, {
|
|
261
|
+
const { description, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context, modelPreferences, {
|
|
252
262
|
truncateTextLength: 200,
|
|
253
263
|
filterNonTextContent: false,
|
|
254
264
|
visibleOnly: false,
|
|
@@ -288,67 +298,17 @@ async function AiExtractElementInfo(options) {
|
|
|
288
298
|
});
|
|
289
299
|
msgs.push(...addOns);
|
|
290
300
|
}
|
|
291
|
-
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA);
|
|
301
|
+
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA, modelPreferences);
|
|
292
302
|
return {
|
|
293
303
|
parseResult: result.content,
|
|
294
304
|
elementById,
|
|
295
305
|
usage: result.usage
|
|
296
306
|
};
|
|
297
307
|
}
|
|
298
|
-
async function AiAssert(options) {
|
|
299
|
-
const { assertion, context } = options;
|
|
300
|
-
(0, utils_namespaceObject.assert)(assertion, 'assertion should not be empty');
|
|
301
|
-
const { screenshotBase64 } = context;
|
|
302
|
-
const systemPrompt = (0, assertion_js_namespaceObject.systemPromptToAssert)({
|
|
303
|
-
isUITars: (0, env_namespaceObject.getAIConfigInBoolean)(env_namespaceObject.MIDSCENE_USE_VLM_UI_TARS)
|
|
304
|
-
});
|
|
305
|
-
const assertionText = extraTextFromUserPrompt(assertion);
|
|
306
|
-
const msgs = [
|
|
307
|
-
{
|
|
308
|
-
role: 'system',
|
|
309
|
-
content: systemPrompt
|
|
310
|
-
},
|
|
311
|
-
{
|
|
312
|
-
role: 'user',
|
|
313
|
-
content: [
|
|
314
|
-
{
|
|
315
|
-
type: 'image_url',
|
|
316
|
-
image_url: {
|
|
317
|
-
url: screenshotBase64,
|
|
318
|
-
detail: 'high'
|
|
319
|
-
}
|
|
320
|
-
},
|
|
321
|
-
{
|
|
322
|
-
type: 'text',
|
|
323
|
-
text: `
|
|
324
|
-
Here is the assertion. Please tell whether it is truthy according to the screenshot.
|
|
325
|
-
=====================================
|
|
326
|
-
${assertionText}
|
|
327
|
-
=====================================
|
|
328
|
-
`
|
|
329
|
-
}
|
|
330
|
-
]
|
|
331
|
-
}
|
|
332
|
-
];
|
|
333
|
-
if ('string' != typeof assertion) {
|
|
334
|
-
const addOns = await promptsToChatParam({
|
|
335
|
-
images: assertion.images,
|
|
336
|
-
convertHttpImage2Base64: assertion.convertHttpImage2Base64
|
|
337
|
-
});
|
|
338
|
-
msgs.push(...addOns);
|
|
339
|
-
}
|
|
340
|
-
const { content: assertResult, usage } = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.ASSERT);
|
|
341
|
-
return {
|
|
342
|
-
content: assertResult,
|
|
343
|
-
usage
|
|
344
|
-
};
|
|
345
|
-
}
|
|
346
|
-
exports.AiAssert = __webpack_exports__.AiAssert;
|
|
347
308
|
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
348
309
|
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
349
310
|
exports.AiLocateSection = __webpack_exports__.AiLocateSection;
|
|
350
311
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
351
|
-
"AiAssert",
|
|
352
312
|
"AiExtractElementInfo",
|
|
353
313
|
"AiLocateElement",
|
|
354
314
|
"AiLocateSection"
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIAssertionResponse,\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_USE_QWEN_VL,\n MIDSCENE_USE_VLM_UI_TARS,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n adaptBboxToRect,\n callAiFn,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport { systemPromptToAssert } from './prompt/assertion';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callToGetJSONObject } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAI?: typeof callAiFn<AIElementResponse | [number, number]>;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAI } = options;\n const { screenshotBase64 } = context;\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context);\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlLocateMode());\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const callAIFn =\n callAI || callToGetJSONObject<AIElementResponse | [number, number]>;\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n callAI?: typeof callAiFn<AISectionLocatorResponse>;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlLocateMode());\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(bbox, context.size.width, context.size.height);\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt } = options;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n\nexport async function AiAssert<\n ElementType extends BaseElement = BaseElement,\n>(options: { assertion: TUserPrompt; context: UIContext<ElementType> }) {\n const { assertion, context } = options;\n\n assert(assertion, 'assertion should not be empty');\n\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToAssert({\n isUITars: getAIConfigInBoolean(MIDSCENE_USE_VLM_UI_TARS),\n });\n\n const assertionText = extraTextFromUserPrompt(assertion);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: `\nHere is the assertion. Please tell whether it is truthy according to the screenshot.\n=====================================\n${assertionText}\n=====================================\n `,\n },\n ],\n },\n ];\n\n if (typeof assertion !== 'string') {\n const addOns = await promptsToChatParam({\n images: assertion.images,\n convertHttpImage2Base64: assertion.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const { content: assertResult, usage } = await callAiFn<AIAssertionResponse>(\n msgs,\n AIActionType.ASSERT,\n );\n return {\n content: assertResult,\n usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAI","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","vlLocateMode","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","callAIFn","callToGetJSONObject","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAiFn","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","getAIConfigInBoolean","MIDSCENE_USE_QWEN_VL","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiAssert","assertion","systemPromptToAssert","MIDSCENE_USE_VLM_UI_TARS","assertionText","assertResult","usage"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;AC6DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OAMD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,MAAM,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAC7B,MAAM,EAAEI,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBP;IAEzBQ,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,0BACA;IAGF,MAAMQ,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0Bf,wBAAwBY;IACpD;IACA,MAAMU,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IAEjD,IAAIC,eAAeX;IAEnB,IAAIJ,QAAQ,YAAY,EAAE;QACxBS,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFS,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFe,eAAef,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIc,AAAmB,cAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACTC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACD,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACVC,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBb,kBACAH,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOR,0BAAuC;QAChD,MAAMgB,SAAS,MAAM1B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAMC,WACJhB,UAAUiB,yBAAAA,mBAAmBA;IAE/B,MAAMC,MAAM,MAAMF,SAASxB,MAAM2B,mCAAAA,YAAAA,CAAAA,eAA4B;IAE7D,MAAMC,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBACAC,6BAAAA,wBACAC,6BAAAA;YALFP,UAAUQ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK5B,QAAQ,IAAI,CAAC,KAAK,EACvD6B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK7B,QAAQ,IAAI,CAAC,MAAM,UACzD8B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG;YAEjC7C,aAAa,WAAWsC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiCnC,QAAQ,IAAI,EAAEiC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAU5B,wBAAwB2B;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAjB;QACA,OAAOe,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB5C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE4C,kBAAkB,EAAE,GAAG7C;IACxC,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMW,eAAekC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BhC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IACjD,MAAMiC,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoB1D,wBAAwBuD;IAC9C;IACA,MAAMlD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM2C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM3B,SAAS,MAAM1B,mBAAmB;YACtC,QAAQqD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAlD,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBvD,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB;IAG3B,IAAI6B;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAnD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM;QAErBZ,aAAa,0BAA0BgE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D5D,aAAa,wBAAwBiE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBuB,MAAMvD,QAAQ,IAAI,CAAC,KAAK,EAAEA,QAAQ,IAAI,CAAC,MAAM;QAExEZ,aAAa,qBAAqBkE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DlE,aAAa,iBAAiBoE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYxD,QAAQ,IAAI;QACvDZ,aAAa,2BAA2B8D;IAC1C;IAEA,IAAIS,cAAcxD;IAClB,IAAI+C,aACFS,cAAc,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAClBzD,kBACA+C,aACAW,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA,EAAqBC,oBAAAA,oBAAoBA;IAI7C,OAAO;QACL,MAAMZ;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAee,qBAGpBhE,OAKD;QA0CKiE;IAzCJ,MAAM,EAAEC,SAAS,EAAEjE,OAAO,EAAEkE,aAAa,EAAE1E,gBAAgB,EAAE,GAAGO;IAChE,MAAMY,eAAewD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAEhE,gBAAgB,EAAE,GAAGH;IAC7B,MAAM,EAAEI,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBP,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAakE,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;IACzC;IAEA,MAAME,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClCjE,aACA6D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKnE;YACL,QAAQ;QACV;IACF;IAGFmE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM1E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS2D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCtE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAMyB,SAAS,MAAM1B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBvD,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB;IAE3B,OAAO;QACL,aAAa2B,OAAO,OAAO;QAC3B3C;QACA,OAAO2C,OAAO,KAAK;IACrB;AACF;AAEO,eAAeuB,SAEpBxE,OAAoE;IACpE,MAAM,EAAEyE,SAAS,EAAExE,OAAO,EAAE,GAAGD;IAE/BS,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOgE,WAAW;IAElB,MAAM,EAAErE,gBAAgB,EAAE,GAAGH;IAE7B,MAAMW,eAAe8D,AAAAA,IAAAA,6BAAAA,oBAAAA,AAAAA,EAAqB;QACxC,UAAUZ,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA,EAAqBa,oBAAAA,wBAAwBA;IACzD;IAEA,MAAMC,gBAAgBtF,wBAAwBmF;IAE9C,MAAM9E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM,CAAC;;;AAGjB,EAAEwE,cAAc;;EAEd,CAAC;gBACK;aACD;QACH;KACD;IAED,IAAI,AAAqB,YAArB,OAAOH,WAAwB;QACjC,MAAMvD,SAAS,MAAM1B,mBAAmB;YACtC,QAAQiF,UAAU,MAAM;YACxB,yBAAyBA,UAAU,uBAAuB;QAC5D;QACA9E,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EAAE,SAAS2D,YAAY,EAAEC,KAAK,EAAE,GAAG,MAAM5B,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EAC7CvD,MACA2B,mCAAAA,YAAAA,CAAAA,MAAmB;IAErB,OAAO;QACL,SAASuD;QACTC;IACF;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n type IModelPreferences,\n getIsUseQwenVl,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n adaptBboxToRect,\n callAiFn,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callToGetJSONObject } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAI?: typeof callAiFn<AIElementResponse | [number, number]>;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAI } = options;\n const { screenshotBase64 } = context;\n\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, modelPreferences);\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(\n vlLocateMode(modelPreferences),\n );\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlLocateMode(modelPreferences) === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode(modelPreferences)) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const callAIFn =\n callAI || callToGetJSONObject<AIElementResponse | [number, number]>;\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, {\n intent: 'grounding',\n });\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n modelPreferences,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n callAI?: typeof callAiFn<AISectionLocatorResponse>;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const { screenshotBase64 } = context;\n\n const modelPreferences: IModelPreferences = {\n intent: 'grounding',\n };\n\n const systemPrompt = systemPromptToLocateSection(\n vlLocateMode(modelPreferences),\n );\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n {\n intent: 'grounding',\n },\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n modelPreferences,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n modelPreferences,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, modelPreferences);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n getIsUseQwenVl({\n intent: 'grounding',\n }),\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelPreferences: IModelPreferences;\n}) {\n const {\n dataQuery,\n context,\n extractOption,\n multimodalPrompt,\n modelPreferences,\n } = options;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n const { description, elementById } = await describeUserPage(\n context,\n modelPreferences,\n {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n },\n );\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelPreferences,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAI","screenshotBase64","modelPreferences","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","vlLocateMode","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","callAIFn","callToGetJSONObject","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAiFn","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","getIsUseQwenVl","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;AC0DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OAMD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,MAAM,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMI,mBAAsC;QAC1C,QAAQ;IACV;IAEA,MAAM,EAAEC,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBR,SAASI;IAElCK,IAAAA,sBAAAA,MAAAA,AAAAA,EACER,0BACA;IAGF,MAAMS,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BhB,wBAAwBY;IACpD;IACA,MAAMW,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EACnBC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaV;IAGf,IAAIW,eAAeZ;IAEnB,IAAIJ,QAAQ,YAAY,EAAE;QACxBU,IAAAA,sBAAAA,MAAAA,AAAAA,EACEV,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFU,IAAAA,sBAAAA,MAAAA,AAAAA,EACEV,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIe,AAAmC,cAAnCA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaV,mBACtBW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACD,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaV,mBACvBW,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBd,kBACAH,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOT,0BAAuC;QAChD,MAAMiB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAMC,WACJjB,UAAUkB,yBAAAA,mBAAmBA;IAE/B,MAAMC,MAAM,MAAMF,SAASzB,MAAM4B,mCAAAA,YAAAA,CAAAA,eAA4B,EAAE;QAC7D,QAAQ;IACV;IAEA,MAAMC,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBAEAC,6BAAAA,wBACAC,6BAAAA;YANFP,UAAUQ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK7B,QAAQ,IAAI,CAAC,KAAK,EACvD8B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK9B,QAAQ,IAAI,CAAC,MAAM,EACzDI,kBAAAA,QACA2B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG;YAEjC9C,aAAa,WAAWuC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiCpC,QAAQ,IAAI,EAAEkC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAU5B,wBAAwB2B;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAjB;QACA,OAAOe,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB7C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE6C,kBAAkB,EAAE,GAAG9C;IACxC,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMI,mBAAsC;QAC1C,QAAQ;IACV;IAEA,MAAMQ,eAAekC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EACnBhC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaV;IAEf,MAAM2C,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoB3D,wBAAwBwD;IAC9C;IACA,MAAMnD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM4C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM3B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQsD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAnD,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBxD,MACA4B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzB;QACE,QAAQ;IACV;IAGF,IAAI6B;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACApD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0BiE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D7D,aAAa,wBAAwBkE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLuB,MACAxD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqBmE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DnE,aAAa,iBAAiBqE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYzD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B+D;IAC1C;IAEA,IAAIS,cAAczD;IAClB,IAAIgD,aACFS,cAAc,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAClB1D,kBACAgD,aACAW,AAAAA,IAAAA,oBAAAA,cAAAA,AAAAA,EAAe;QACb,QAAQ;IACV;IAIJ,OAAO;QACL,MAAMX;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAGpBhE,OAMD;QAoDKiE;IAnDJ,MAAM,EACJC,SAAS,EACTjE,OAAO,EACPkE,aAAa,EACb1E,gBAAgB,EAChBY,gBAAgB,EACjB,GAAGL;IACJ,MAAMa,eAAeuD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAEhE,gBAAgB,EAAE,GAAGH;IAC7B,MAAM,EAAEK,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EACzCR,SACAI,kBACA;QACE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAa8D,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;IACzC;IAGF,MAAME,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClChE,aACA4D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKnE;YACL,QAAQ;QACV;IACF;IAGFmE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM1E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASkB;QAAa;QACxC;YACE,MAAM;YACN,SAAS0D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCtE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBxD,MACA4B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBlB;IAEF,OAAO;QACL,aAAa6C,OAAO,OAAO;QAC3B3C;QACA,OAAO2C,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -28,28 +28,33 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
28
28
|
});
|
|
29
29
|
const env_namespaceObject = require("@midscene/shared/env");
|
|
30
30
|
const img_namespaceObject = require("@midscene/shared/img");
|
|
31
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
31
32
|
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
32
33
|
const external_common_js_namespaceObject = require("./common.js");
|
|
33
34
|
const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
|
|
34
35
|
const util_js_namespaceObject = require("./prompt/util.js");
|
|
36
|
+
const debug = (0, logger_namespaceObject.getDebug)('planning');
|
|
35
37
|
async function plan(userInstruction, opts) {
|
|
36
38
|
var _planFromAI_action;
|
|
37
39
|
const { callAI, context } = opts || {};
|
|
38
40
|
const { screenshotBase64, size } = context;
|
|
39
|
-
const
|
|
41
|
+
const modelPreferences = {
|
|
42
|
+
intent: 'planning'
|
|
43
|
+
};
|
|
44
|
+
const { description: pageDescription, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context, modelPreferences);
|
|
40
45
|
const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
|
|
41
46
|
actionSpace: opts.actionSpace,
|
|
42
|
-
vlMode: (0, env_namespaceObject.vlLocateMode)()
|
|
47
|
+
vlMode: (0, env_namespaceObject.vlLocateMode)(modelPreferences)
|
|
43
48
|
});
|
|
44
49
|
const taskBackgroundContextText = (0, llm_planning_js_namespaceObject.generateTaskBackgroundContext)(userInstruction, opts.log, opts.actionContext);
|
|
45
|
-
const userInstructionPrompt = await (0, llm_planning_js_namespaceObject.automationUserPrompt)((0, env_namespaceObject.vlLocateMode)()).format({
|
|
50
|
+
const userInstructionPrompt = await (0, llm_planning_js_namespaceObject.automationUserPrompt)((0, env_namespaceObject.vlLocateMode)(modelPreferences)).format({
|
|
46
51
|
pageDescription,
|
|
47
52
|
taskBackgroundContext: taskBackgroundContextText
|
|
48
53
|
});
|
|
49
54
|
let imagePayload = screenshotBase64;
|
|
50
|
-
if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
51
|
-
else if (!(0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
52
|
-
(0, external_common_js_namespaceObject.warnGPT4oSizeLimit)(size);
|
|
55
|
+
if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)(modelPreferences)) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
56
|
+
else if (!(0, env_namespaceObject.vlLocateMode)(modelPreferences)) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
57
|
+
(0, external_common_js_namespaceObject.warnGPT4oSizeLimit)(size, modelPreferences);
|
|
53
58
|
const msgs = [
|
|
54
59
|
{
|
|
55
60
|
role: 'system',
|
|
@@ -73,7 +78,7 @@ async function plan(userInstruction, opts) {
|
|
|
73
78
|
}
|
|
74
79
|
];
|
|
75
80
|
const call = callAI || external_common_js_namespaceObject.callAiFn;
|
|
76
|
-
const { content, usage } = await call(msgs, external_common_js_namespaceObject.AIActionType.PLAN);
|
|
81
|
+
const { content, usage } = await call(msgs, external_common_js_namespaceObject.AIActionType.PLAN, modelPreferences);
|
|
77
82
|
const rawResponse = JSON.stringify(content, void 0, 2);
|
|
78
83
|
const planFromAI = content;
|
|
79
84
|
const actions = ((null == (_planFromAI_action = planFromAI.action) ? void 0 : _planFromAI_action.type) ? [
|
|
@@ -84,27 +89,25 @@ async function plan(userInstruction, opts) {
|
|
|
84
89
|
actions,
|
|
85
90
|
rawResponse,
|
|
86
91
|
usage,
|
|
87
|
-
yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, planFromAI.sleep)
|
|
92
|
+
yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, opts.actionSpace, planFromAI.sleep)
|
|
88
93
|
};
|
|
89
94
|
(0, utils_namespaceObject.assert)(planFromAI, "can't get plans from AI");
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
actions.forEach((action)=>{
|
|
96
|
+
const type = action.type;
|
|
97
|
+
const actionInActionSpace = opts.actionSpace.find((action)=>action.name === type);
|
|
98
|
+
const locateFields = actionInActionSpace ? (0, external_common_js_namespaceObject.findAllMidsceneLocatorField)(actionInActionSpace.paramSchema) : [];
|
|
99
|
+
debug('locateFields', locateFields);
|
|
100
|
+
locateFields.forEach((field)=>{
|
|
101
|
+
const locateResult = action.param[field];
|
|
102
|
+
if (locateResult) if ((0, env_namespaceObject.vlLocateMode)(modelPreferences)) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, size.width, size.height, modelPreferences);
|
|
103
|
+
else {
|
|
104
|
+
const element = elementById(locateResult);
|
|
105
|
+
if (element) action.param[field].id = element.id;
|
|
98
106
|
}
|
|
107
|
+
action.locate = action.param[field];
|
|
99
108
|
});
|
|
100
|
-
(0, utils_namespaceObject.assert)(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
101
|
-
} else actions.forEach((action)=>{
|
|
102
|
-
var _action_locate;
|
|
103
|
-
if (null == (_action_locate = action.locate) ? void 0 : _action_locate.id) {
|
|
104
|
-
const element = elementById(action.locate.id);
|
|
105
|
-
if (element) action.locate.id = element.id;
|
|
106
|
-
}
|
|
107
109
|
});
|
|
110
|
+
(0, utils_namespaceObject.assert)(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
108
111
|
if (0 === actions.length && returnValue.more_actions_needed_by_instruction && !returnValue.sleep) console.warn('No actions planned for the prompt, but model said more actions are needed:', userInstruction);
|
|
109
112
|
return returnValue;
|
|
110
113
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById }
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { type IModelPreferences, vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n findAllMidsceneLocatorField,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction<any>[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n\n const modelPreferences: IModelPreferences = {\n intent: 'planning',\n };\n const { description: pageDescription, elementById } = await describeUserPage(\n context,\n modelPreferences,\n );\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(modelPreferences),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(modelPreferences),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode(modelPreferences) === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode(modelPreferences)) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size, modelPreferences);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(\n msgs,\n AIActionType.PLAN,\n modelPreferences,\n );\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n // TODO: use zod.parse to parse the action.param, and then fill the bbox param.\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (vlLocateMode(modelPreferences)) {\n action.param[field] = fillBboxParam(\n locateResult,\n size.width,\n size.height,\n modelPreferences,\n );\n } else {\n const element = elementById(locateResult);\n if (element) {\n action.param[field].id = element.id;\n }\n }\n }\n\n // to be compatible with the web-integration\n action.locate = action.param[field];\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","_planFromAI_action","callAI","context","screenshotBase64","size","modelPreferences","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","vlLocateMode","taskBackgroundContextText","generateTaskBackgroundContext","userInstructionPrompt","automationUserPrompt","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","msgs","call","callAiFn","content","usage","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAOC;QAwEEC;IAtEH,MAAM,EAAEC,MAAM,EAAEC,OAAO,EAAE,GAAGH,QAAQ,CAAC;IACrC,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGF;IAEnC,MAAMG,mBAAsC;QAC1C,QAAQ;IACV;IACA,MAAM,EAAE,aAAaC,eAAe,EAAEC,WAAW,EAAE,GAAG,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAC1DN,SACAG;IAGF,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaX,KAAK,WAAW;QAC7B,QAAQY,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaN;IACvB;IACA,MAAMO,4BAA4BC,AAAAA,IAAAA,gCAAAA,6BAAAA,AAAAA,EAChCf,iBACAC,KAAK,GAAG,EACRA,KAAK,aAAa;IAEpB,MAAMe,wBAAwB,MAAMC,AAAAA,IAAAA,gCAAAA,oBAAAA,AAAAA,EAClCJ,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaN,mBACb,MAAM,CAAC;QACPC;QACA,uBAAuBM;IACzB;IAEA,IAAII,eAAeb;IACnB,IAAIQ,AAAmC,cAAnCA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaN,mBACfW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACL,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaN,mBACvBW,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBf,kBACAD,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBiB,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBf,MAAMC;IAEzB,MAAMe,OAAe;QACnB;YAAE,MAAM;YAAU,SAASX;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKO;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMF;gBACR;aACD;QACH;KACD;IAED,MAAMO,OAAOpB,UAAUqB,mCAAAA,QAAQA;IAC/B,MAAM,EAAEC,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMH,KAC/BD,MACAK,mCAAAA,YAAAA,CAAAA,IAAiB,EACjBpB;IAEF,MAAMqB,cAAcC,KAAK,SAAS,CAACJ,SAASK,QAAW;IACvD,MAAMC,aAAaN;IAEnB,MAAMO,UACH9B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC6B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAF;QACA,UAAUQ,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACA/B,KAAK,WAAW,EAChB8B,WAAW,KAAK;IAEpB;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAGnBC,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBrC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACmC,SAAWA,OAAO,IAAI,KAAKC;QAE9B,MAAME,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENzC,MAAM,gBAAgB0C;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,cACF,IAAI7B,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,EAAaN,mBACf6B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACApC,KAAK,KAAK,EACVA,KAAK,MAAM,EACXC;iBAEG;gBACL,MAAMqC,UAAUnC,YAAYiC;gBAC5B,IAAIE,SACFR,OAAO,KAAK,CAACK,MAAM,CAAC,EAAE,GAAGG,QAAQ,EAAE;YAEvC;YAIFR,OAAO,MAAM,GAAGA,OAAO,KAAK,CAACK,MAAM;QACrC;IACF;IAEAN,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEC,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACA7C;IAIJ,OAAOiC;AACT"}
|
|
@@ -24,33 +24,8 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
-
assertSchema: ()=>assertSchema
|
|
28
|
-
systemPromptToAssert: ()=>systemPromptToAssert
|
|
27
|
+
assertSchema: ()=>assertSchema
|
|
29
28
|
});
|
|
30
|
-
const env_namespaceObject = require("@midscene/shared/env");
|
|
31
|
-
const defaultAssertionPrompt = 'You are a senior testing engineer. User will give an assertion and a screenshot of a page. By carefully viewing the screenshot, please tell whether the assertion is truthy.';
|
|
32
|
-
const defaultAssertionResponseJsonFormat = `Return in the following JSON format:
|
|
33
|
-
{
|
|
34
|
-
pass: boolean, // whether the assertion is truthy
|
|
35
|
-
thought: string | null, // string, if the result is falsy, give the reason why it is falsy. Otherwise, put null.
|
|
36
|
-
}`;
|
|
37
|
-
const getUiTarsAssertionResponseJsonFormat = ()=>`## Output Json String Format
|
|
38
|
-
\`\`\`
|
|
39
|
-
"{
|
|
40
|
-
"pass": <<is a boolean value from the enum [true, false], true means the assertion is truthy>>,
|
|
41
|
-
"thought": "<<is a string, give the reason why the assertion is falsy or truthy. Otherwise.>>"
|
|
42
|
-
}"
|
|
43
|
-
\`\`\`
|
|
44
|
-
|
|
45
|
-
## Rules **MUST** follow
|
|
46
|
-
- Make sure to return **only** the JSON, with **no additional** text or explanations.
|
|
47
|
-
- Use ${(0, env_namespaceObject.getPreferredLanguage)()} in \`thought\` part.
|
|
48
|
-
- You **MUST** strictly follow up the **Output Json String Format**.`;
|
|
49
|
-
function systemPromptToAssert(model) {
|
|
50
|
-
return `${defaultAssertionPrompt}
|
|
51
|
-
|
|
52
|
-
${model.isUITars ? getUiTarsAssertionResponseJsonFormat() : defaultAssertionResponseJsonFormat}`;
|
|
53
|
-
}
|
|
54
29
|
const assertSchema = {
|
|
55
30
|
type: 'json_schema',
|
|
56
31
|
json_schema: {
|
|
@@ -80,10 +55,8 @@ const assertSchema = {
|
|
|
80
55
|
}
|
|
81
56
|
};
|
|
82
57
|
exports.assertSchema = __webpack_exports__.assertSchema;
|
|
83
|
-
exports.systemPromptToAssert = __webpack_exports__.systemPromptToAssert;
|
|
84
58
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
85
|
-
"assertSchema"
|
|
86
|
-
"systemPromptToAssert"
|
|
59
|
+
"assertSchema"
|
|
87
60
|
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
88
61
|
Object.defineProperty(exports, '__esModule', {
|
|
89
62
|
value: true
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/prompt/assertion.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/prompt/assertion.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import
|
|
1
|
+
{"version":3,"file":"ai-model/prompt/assertion.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/prompt/assertion.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { ResponseFormatJSONSchema } from 'openai/resources/index';\n\nexport const assertSchema: ResponseFormatJSONSchema = {\n type: 'json_schema',\n json_schema: {\n name: 'assert',\n strict: true,\n schema: {\n type: 'object',\n properties: {\n pass: {\n type: 'boolean',\n description: 'Whether the assertion passed or failed',\n },\n thought: {\n type: ['string', 'null'],\n description: 'The thought process behind the assertion',\n },\n },\n required: ['pass', 'thought'],\n additionalProperties: false,\n },\n },\n};\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","assertSchema"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;ACJO,MAAMI,eAAyC;IACpD,MAAM;IACN,aAAa;QACX,MAAM;QACN,QAAQ;QACR,QAAQ;YACN,MAAM;YACN,YAAY;gBACV,MAAM;oBACJ,MAAM;oBACN,aAAa;gBACf;gBACA,SAAS;oBACP,MAAM;wBAAC;wBAAU;qBAAO;oBACxB,aAAa;gBACf;YACF;YACA,UAAU;gBAAC;gBAAQ;aAAU;YAC7B,sBAAsB;QACxB;IACF;AACF"}
|