@midscene/core 0.26.5-beta-20250814080504.0 → 0.26.5-beta-20250814125155.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/ai-model/action-executor.mjs +139 -0
- package/dist/es/ai-model/action-executor.mjs.map +1 -0
- package/dist/es/ai-model/common.mjs +219 -0
- package/dist/es/ai-model/common.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +10 -0
- package/dist/es/ai-model/inspect.mjs +317 -0
- package/dist/es/ai-model/inspect.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +85 -0
- package/dist/es/ai-model/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/assertion.mjs +55 -0
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
- package/dist/es/ai-model/prompt/common.mjs +7 -0
- package/dist/es/ai-model/prompt/common.mjs.map +1 -0
- package/dist/es/ai-model/prompt/describe.mjs +44 -0
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +137 -0
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +359 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/util.mjs +123 -0
- package/dist/es/ai-model/prompt/util.mjs.map +1 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +413 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +235 -0
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
- package/dist/es/image/index.mjs +2 -0
- package/dist/es/index.mjs +7 -2360
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +261 -0
- package/dist/es/insight/index.mjs.map +1 -0
- package/dist/es/insight/utils.mjs +19 -0
- package/dist/es/insight/utils.mjs.map +1 -0
- package/dist/es/types.mjs +11 -0
- package/dist/es/types.mjs.map +1 -0
- package/dist/es/utils.mjs +2 -2
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/ai-model/action-executor.js +173 -0
- package/dist/lib/ai-model/action-executor.js.map +1 -0
- package/dist/lib/ai-model/common.js +289 -0
- package/dist/lib/ai-model/common.js.map +1 -0
- package/dist/lib/ai-model/index.js +103 -0
- package/dist/lib/ai-model/index.js.map +1 -0
- package/dist/lib/ai-model/inspect.js +360 -0
- package/dist/lib/ai-model/inspect.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +119 -0
- package/dist/lib/ai-model/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/assertion.js +92 -0
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
- package/dist/lib/ai-model/prompt/common.js +41 -0
- package/dist/lib/ai-model/prompt/common.js.map +1 -0
- package/dist/lib/ai-model/prompt/describe.js +78 -0
- package/dist/lib/ai-model/prompt/describe.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +177 -0
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +415 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/util.js +175 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +496 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -0
- package/dist/lib/ai-model/ui-tars-planning.js +272 -0
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
- package/dist/lib/image/index.js +56 -0
- package/dist/lib/image/index.js.map +1 -0
- package/dist/lib/index.js +21 -2393
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +295 -0
- package/dist/lib/insight/index.js.map +1 -0
- package/dist/lib/insight/utils.js +53 -0
- package/dist/lib/insight/utils.js.map +1 -0
- package/dist/lib/types.js +82 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.js +2 -2
- package/dist/lib/yaml.js +20 -0
- package/dist/lib/yaml.js.map +1 -0
- package/dist/types/ai-model/action-executor.d.ts +19 -0
- package/dist/types/ai-model/common.d.ts +34 -0
- package/dist/types/ai-model/index.d.ts +11 -0
- package/dist/types/ai-model/inspect.d.ts +49 -0
- package/dist/types/ai-model/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/assertion.d.ts +5 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +15 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +25 -0
- package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +45 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +99 -0
- package/dist/types/ai-model/service-caller/index.d.ts +26 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +76 -0
- package/dist/types/image/index.d.ts +1 -0
- package/dist/types/index.d.ts +9 -1289
- package/dist/types/insight/index.d.ts +26 -0
- package/dist/types/insight/utils.d.ts +2 -0
- package/dist/types/tree.d.ts +1 -11
- package/dist/types/types.d.ts +399 -0
- package/dist/types/utils.d.ts +27 -47
- package/dist/types/yaml.d.ts +172 -0
- package/package.json +6 -6
- package/dist/es/ai-model.mjs +0 -2502
- package/dist/es/ai-model.mjs.map +0 -1
- package/dist/lib/ai-model.js +0 -2622
- package/dist/lib/ai-model.js.map +0 -1
- package/dist/types/ai-model.d.ts +0 -596
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/index.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D"}
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
AiAssert: ()=>AiAssert,
|
|
28
|
+
AiExtractElementInfo: ()=>AiExtractElementInfo,
|
|
29
|
+
AiLocateSection: ()=>AiLocateSection,
|
|
30
|
+
AiLocateElement: ()=>AiLocateElement
|
|
31
|
+
});
|
|
32
|
+
const env_namespaceObject = require("@midscene/shared/env");
|
|
33
|
+
const img_namespaceObject = require("@midscene/shared/img");
|
|
34
|
+
const logger_namespaceObject = require("@midscene/shared/logger");
|
|
35
|
+
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
36
|
+
const external_common_js_namespaceObject = require("./common.js");
|
|
37
|
+
const assertion_js_namespaceObject = require("./prompt/assertion.js");
|
|
38
|
+
const extraction_js_namespaceObject = require("./prompt/extraction.js");
|
|
39
|
+
const llm_locator_js_namespaceObject = require("./prompt/llm-locator.js");
|
|
40
|
+
const llm_section_locator_js_namespaceObject = require("./prompt/llm-section-locator.js");
|
|
41
|
+
const util_js_namespaceObject = require("./prompt/util.js");
|
|
42
|
+
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
43
|
+
const debugInspect = (0, logger_namespaceObject.getDebug)('ai:inspect');
|
|
44
|
+
const debugSection = (0, logger_namespaceObject.getDebug)('ai:section');
|
|
45
|
+
const extraTextFromUserPrompt = (prompt)=>{
|
|
46
|
+
if ('string' == typeof prompt) return prompt;
|
|
47
|
+
return prompt.prompt;
|
|
48
|
+
};
|
|
49
|
+
const promptsToChatParam = async (multimodalPrompt)=>{
|
|
50
|
+
var _multimodalPrompt_images;
|
|
51
|
+
const msgs = [];
|
|
52
|
+
if (null == multimodalPrompt ? void 0 : null == (_multimodalPrompt_images = multimodalPrompt.images) ? void 0 : _multimodalPrompt_images.length) {
|
|
53
|
+
msgs.push({
|
|
54
|
+
role: 'user',
|
|
55
|
+
content: [
|
|
56
|
+
{
|
|
57
|
+
type: 'text',
|
|
58
|
+
text: 'Next, I will provide all the reference images.'
|
|
59
|
+
}
|
|
60
|
+
]
|
|
61
|
+
});
|
|
62
|
+
for (const item of multimodalPrompt.images){
|
|
63
|
+
const base64 = await (0, img_namespaceObject.preProcessImageUrl)(item.url, !!multimodalPrompt.convertHttpImage2Base64);
|
|
64
|
+
msgs.push({
|
|
65
|
+
role: 'user',
|
|
66
|
+
content: [
|
|
67
|
+
{
|
|
68
|
+
type: 'text',
|
|
69
|
+
text: `reference image ${item.name}:`
|
|
70
|
+
}
|
|
71
|
+
]
|
|
72
|
+
});
|
|
73
|
+
msgs.push({
|
|
74
|
+
role: 'user',
|
|
75
|
+
content: [
|
|
76
|
+
{
|
|
77
|
+
type: 'image_url',
|
|
78
|
+
image_url: {
|
|
79
|
+
url: base64,
|
|
80
|
+
detail: 'high'
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
]
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return msgs;
|
|
88
|
+
};
|
|
89
|
+
async function AiLocateElement(options) {
|
|
90
|
+
const { context, targetElementDescription, callAI } = options;
|
|
91
|
+
const { screenshotBase64 } = context;
|
|
92
|
+
const { description, elementById, insertElementByPosition } = await (0, util_js_namespaceObject.describeUserPage)(context);
|
|
93
|
+
(0, utils_namespaceObject.assert)(targetElementDescription, "cannot find the target element description");
|
|
94
|
+
const userInstructionPrompt = await llm_locator_js_namespaceObject.findElementPrompt.format({
|
|
95
|
+
pageDescription: description,
|
|
96
|
+
targetElementDescription: extraTextFromUserPrompt(targetElementDescription)
|
|
97
|
+
});
|
|
98
|
+
const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)((0, env_namespaceObject.vlLocateMode)());
|
|
99
|
+
let imagePayload = screenshotBase64;
|
|
100
|
+
if (options.searchConfig) {
|
|
101
|
+
(0, utils_namespaceObject.assert)(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
102
|
+
(0, utils_namespaceObject.assert)(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
103
|
+
imagePayload = options.searchConfig.imageBase64;
|
|
104
|
+
} else if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
105
|
+
else if (!(0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
106
|
+
const msgs = [
|
|
107
|
+
{
|
|
108
|
+
role: 'system',
|
|
109
|
+
content: systemPrompt
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
role: 'user',
|
|
113
|
+
content: [
|
|
114
|
+
{
|
|
115
|
+
type: 'image_url',
|
|
116
|
+
image_url: {
|
|
117
|
+
url: imagePayload,
|
|
118
|
+
detail: 'high'
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
type: 'text',
|
|
123
|
+
text: userInstructionPrompt
|
|
124
|
+
}
|
|
125
|
+
]
|
|
126
|
+
}
|
|
127
|
+
];
|
|
128
|
+
if ('string' != typeof targetElementDescription) {
|
|
129
|
+
const addOns = await promptsToChatParam({
|
|
130
|
+
images: targetElementDescription.images,
|
|
131
|
+
convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64
|
|
132
|
+
});
|
|
133
|
+
msgs.push(...addOns);
|
|
134
|
+
}
|
|
135
|
+
const callAIFn = callAI || index_js_namespaceObject.callToGetJSONObject;
|
|
136
|
+
const res = await callAIFn(msgs, external_common_js_namespaceObject.AIActionType.INSPECT_ELEMENT);
|
|
137
|
+
const rawResponse = JSON.stringify(res.content);
|
|
138
|
+
let resRect;
|
|
139
|
+
let matchedElements = 'elements' in res.content ? res.content.elements : [];
|
|
140
|
+
let errors = 'errors' in res.content ? res.content.errors : [];
|
|
141
|
+
try {
|
|
142
|
+
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
|
143
|
+
var _options_searchConfig_rect, _options_searchConfig, _options_searchConfig_rect1, _options_searchConfig1, _options_searchConfig_rect2, _options_searchConfig2, _options_searchConfig_rect3, _options_searchConfig3;
|
|
144
|
+
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, (null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect.width) || context.size.width, (null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect1 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect1.height) || context.size.height, null == (_options_searchConfig2 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig2.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig3 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig3.rect) ? void 0 : _options_searchConfig_rect3.top);
|
|
145
|
+
debugInspect('resRect', resRect);
|
|
146
|
+
const rectCenter = {
|
|
147
|
+
x: resRect.left + resRect.width / 2,
|
|
148
|
+
y: resRect.top + resRect.height / 2
|
|
149
|
+
};
|
|
150
|
+
let element = (0, util_js_namespaceObject.elementByPositionWithElementInfo)(context.tree, rectCenter);
|
|
151
|
+
const distanceToCenter = element ? (0, util_js_namespaceObject.distance)({
|
|
152
|
+
x: element.center[0],
|
|
153
|
+
y: element.center[1]
|
|
154
|
+
}, rectCenter) : 0;
|
|
155
|
+
if (!element || distanceToCenter > util_js_namespaceObject.distanceThreshold) element = insertElementByPosition(rectCenter);
|
|
156
|
+
if (element) {
|
|
157
|
+
matchedElements = [
|
|
158
|
+
element
|
|
159
|
+
];
|
|
160
|
+
errors = [];
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
} catch (e) {
|
|
164
|
+
const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : 'unknown error in locate';
|
|
165
|
+
if (errors && (null == errors ? void 0 : errors.length) !== 0) errors.push(`(${msg})`);
|
|
166
|
+
else errors = [
|
|
167
|
+
msg
|
|
168
|
+
];
|
|
169
|
+
}
|
|
170
|
+
return {
|
|
171
|
+
rect: resRect,
|
|
172
|
+
parseResult: {
|
|
173
|
+
elements: matchedElements,
|
|
174
|
+
errors
|
|
175
|
+
},
|
|
176
|
+
rawResponse,
|
|
177
|
+
elementById,
|
|
178
|
+
usage: res.usage,
|
|
179
|
+
isOrderSensitive: 'object' == typeof res.content && null !== res.content && 'isOrderSensitive' in res.content ? res.content.isOrderSensitive : void 0
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
async function AiLocateSection(options) {
|
|
183
|
+
const { context, sectionDescription } = options;
|
|
184
|
+
const { screenshotBase64 } = context;
|
|
185
|
+
const systemPrompt = (0, llm_section_locator_js_namespaceObject.systemPromptToLocateSection)((0, env_namespaceObject.vlLocateMode)());
|
|
186
|
+
const sectionLocatorInstructionText = await llm_section_locator_js_namespaceObject.sectionLocatorInstruction.format({
|
|
187
|
+
sectionDescription: extraTextFromUserPrompt(sectionDescription)
|
|
188
|
+
});
|
|
189
|
+
const msgs = [
|
|
190
|
+
{
|
|
191
|
+
role: 'system',
|
|
192
|
+
content: systemPrompt
|
|
193
|
+
},
|
|
194
|
+
{
|
|
195
|
+
role: 'user',
|
|
196
|
+
content: [
|
|
197
|
+
{
|
|
198
|
+
type: 'image_url',
|
|
199
|
+
image_url: {
|
|
200
|
+
url: screenshotBase64,
|
|
201
|
+
detail: 'high'
|
|
202
|
+
}
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
type: 'text',
|
|
206
|
+
text: sectionLocatorInstructionText
|
|
207
|
+
}
|
|
208
|
+
]
|
|
209
|
+
}
|
|
210
|
+
];
|
|
211
|
+
if ('string' != typeof sectionDescription) {
|
|
212
|
+
const addOns = await promptsToChatParam({
|
|
213
|
+
images: sectionDescription.images,
|
|
214
|
+
convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64
|
|
215
|
+
});
|
|
216
|
+
msgs.push(...addOns);
|
|
217
|
+
}
|
|
218
|
+
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA);
|
|
219
|
+
let sectionRect;
|
|
220
|
+
const sectionBbox = result.content.bbox;
|
|
221
|
+
if (sectionBbox) {
|
|
222
|
+
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height);
|
|
223
|
+
debugSection('original targetRect %j', targetRect);
|
|
224
|
+
const referenceBboxList = result.content.references_bbox || [];
|
|
225
|
+
debugSection('referenceBboxList %j', referenceBboxList);
|
|
226
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height));
|
|
227
|
+
debugSection('referenceRects %j', referenceRects);
|
|
228
|
+
const mergedRect = (0, external_common_js_namespaceObject.mergeRects)([
|
|
229
|
+
targetRect,
|
|
230
|
+
...referenceRects
|
|
231
|
+
]);
|
|
232
|
+
debugSection('mergedRect %j', mergedRect);
|
|
233
|
+
sectionRect = (0, external_common_js_namespaceObject.expandSearchArea)(mergedRect, context.size);
|
|
234
|
+
debugSection('expanded sectionRect %j', sectionRect);
|
|
235
|
+
}
|
|
236
|
+
let imageBase64 = screenshotBase64;
|
|
237
|
+
if (sectionRect) imageBase64 = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, (0, env_namespaceObject.getAIConfigInBoolean)(env_namespaceObject.MIDSCENE_USE_QWEN_VL));
|
|
238
|
+
return {
|
|
239
|
+
rect: sectionRect,
|
|
240
|
+
imageBase64,
|
|
241
|
+
error: result.content.error,
|
|
242
|
+
rawResponse: JSON.stringify(result.content),
|
|
243
|
+
usage: result.usage
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
async function AiExtractElementInfo(options) {
|
|
247
|
+
var _options_extractOption;
|
|
248
|
+
const { dataQuery, context, extractOption, multimodalPrompt } = options;
|
|
249
|
+
const systemPrompt = (0, extraction_js_namespaceObject.systemPromptToExtract)();
|
|
250
|
+
const { screenshotBase64 } = context;
|
|
251
|
+
const { description, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context, {
|
|
252
|
+
truncateTextLength: 200,
|
|
253
|
+
filterNonTextContent: false,
|
|
254
|
+
visibleOnly: false,
|
|
255
|
+
domIncluded: null == extractOption ? void 0 : extractOption.domIncluded
|
|
256
|
+
});
|
|
257
|
+
const extractDataPromptText = await (0, extraction_js_namespaceObject.extractDataQueryPrompt)(description, dataQuery);
|
|
258
|
+
const userContent = [];
|
|
259
|
+
if ((null == extractOption ? void 0 : extractOption.screenshotIncluded) !== false) userContent.push({
|
|
260
|
+
type: 'image_url',
|
|
261
|
+
image_url: {
|
|
262
|
+
url: screenshotBase64,
|
|
263
|
+
detail: 'high'
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
userContent.push({
|
|
267
|
+
type: 'text',
|
|
268
|
+
text: extractDataPromptText
|
|
269
|
+
});
|
|
270
|
+
const msgs = [
|
|
271
|
+
{
|
|
272
|
+
role: 'system',
|
|
273
|
+
content: systemPrompt
|
|
274
|
+
},
|
|
275
|
+
{
|
|
276
|
+
role: 'user',
|
|
277
|
+
content: userContent
|
|
278
|
+
}
|
|
279
|
+
];
|
|
280
|
+
if (null == (_options_extractOption = options.extractOption) ? void 0 : _options_extractOption.returnThought) msgs.push({
|
|
281
|
+
role: 'user',
|
|
282
|
+
content: 'Please provide reasons.'
|
|
283
|
+
});
|
|
284
|
+
if (multimodalPrompt) {
|
|
285
|
+
const addOns = await promptsToChatParam({
|
|
286
|
+
images: multimodalPrompt.images,
|
|
287
|
+
convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64
|
|
288
|
+
});
|
|
289
|
+
msgs.push(...addOns);
|
|
290
|
+
}
|
|
291
|
+
const result = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.EXTRACT_DATA);
|
|
292
|
+
return {
|
|
293
|
+
parseResult: result.content,
|
|
294
|
+
elementById,
|
|
295
|
+
usage: result.usage
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
async function AiAssert(options) {
|
|
299
|
+
const { assertion, context } = options;
|
|
300
|
+
(0, utils_namespaceObject.assert)(assertion, 'assertion should not be empty');
|
|
301
|
+
const { screenshotBase64 } = context;
|
|
302
|
+
const systemPrompt = (0, assertion_js_namespaceObject.systemPromptToAssert)({
|
|
303
|
+
isUITars: (0, env_namespaceObject.getAIConfigInBoolean)(env_namespaceObject.MIDSCENE_USE_VLM_UI_TARS)
|
|
304
|
+
});
|
|
305
|
+
const assertionText = extraTextFromUserPrompt(assertion);
|
|
306
|
+
const msgs = [
|
|
307
|
+
{
|
|
308
|
+
role: 'system',
|
|
309
|
+
content: systemPrompt
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
role: 'user',
|
|
313
|
+
content: [
|
|
314
|
+
{
|
|
315
|
+
type: 'image_url',
|
|
316
|
+
image_url: {
|
|
317
|
+
url: screenshotBase64,
|
|
318
|
+
detail: 'high'
|
|
319
|
+
}
|
|
320
|
+
},
|
|
321
|
+
{
|
|
322
|
+
type: 'text',
|
|
323
|
+
text: `
|
|
324
|
+
Here is the assertion. Please tell whether it is truthy according to the screenshot.
|
|
325
|
+
=====================================
|
|
326
|
+
${assertionText}
|
|
327
|
+
=====================================
|
|
328
|
+
`
|
|
329
|
+
}
|
|
330
|
+
]
|
|
331
|
+
}
|
|
332
|
+
];
|
|
333
|
+
if ('string' != typeof assertion) {
|
|
334
|
+
const addOns = await promptsToChatParam({
|
|
335
|
+
images: assertion.images,
|
|
336
|
+
convertHttpImage2Base64: assertion.convertHttpImage2Base64
|
|
337
|
+
});
|
|
338
|
+
msgs.push(...addOns);
|
|
339
|
+
}
|
|
340
|
+
const { content: assertResult, usage } = await (0, external_common_js_namespaceObject.callAiFn)(msgs, external_common_js_namespaceObject.AIActionType.ASSERT);
|
|
341
|
+
return {
|
|
342
|
+
content: assertResult,
|
|
343
|
+
usage
|
|
344
|
+
};
|
|
345
|
+
}
|
|
346
|
+
exports.AiAssert = __webpack_exports__.AiAssert;
|
|
347
|
+
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
348
|
+
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
349
|
+
exports.AiLocateSection = __webpack_exports__.AiLocateSection;
|
|
350
|
+
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
351
|
+
"AiAssert",
|
|
352
|
+
"AiExtractElementInfo",
|
|
353
|
+
"AiLocateElement",
|
|
354
|
+
"AiLocateSection"
|
|
355
|
+
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
356
|
+
Object.defineProperty(exports, '__esModule', {
|
|
357
|
+
value: true
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
//# sourceMappingURL=inspect.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIAssertionResponse,\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n TMultimodalPrompt,\n TUserPrompt,\n UIContext,\n} from '@/types';\nimport {\n MIDSCENE_USE_QWEN_VL,\n MIDSCENE_USE_VLM_UI_TARS,\n getAIConfigInBoolean,\n vlLocateMode,\n} from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n adaptBboxToRect,\n callAiFn,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport { systemPromptToAssert } from './prompt/assertion';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callToGetJSONObject } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAI?: typeof callAiFn<AIElementResponse | [number, number]>;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAI } = options;\n const { screenshotBase64 } = context;\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context);\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlLocateMode());\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const callAIFn =\n callAI || callToGetJSONObject<AIElementResponse | [number, number]>;\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n callAI?: typeof callAiFn<AISectionLocatorResponse>;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription } = options;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlLocateMode());\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(bbox, context.size.width, context.size.height);\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n getAIConfigInBoolean(MIDSCENE_USE_QWEN_VL),\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt } = options;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAiFn<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n\nexport async function AiAssert<\n ElementType extends BaseElement = BaseElement,\n>(options: { assertion: TUserPrompt; context: UIContext<ElementType> }) {\n const { assertion, context } = options;\n\n assert(assertion, 'assertion should not be empty');\n\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToAssert({\n isUITars: getAIConfigInBoolean(MIDSCENE_USE_VLM_UI_TARS),\n });\n\n const assertionText = extraTextFromUserPrompt(assertion);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: `\nHere is the assertion. Please tell whether it is truthy according to the screenshot.\n=====================================\n${assertionText}\n=====================================\n `,\n },\n ],\n },\n ];\n\n if (typeof assertion !== 'string') {\n const addOns = await promptsToChatParam({\n images: assertion.images,\n convertHttpImage2Base64: assertion.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const { content: assertResult, usage } = await callAiFn<AIAssertionResponse>(\n msgs,\n AIActionType.ASSERT,\n );\n return {\n content: assertResult,\n usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAI","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","vlLocateMode","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","callAIFn","callToGetJSONObject","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAiFn","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","getAIConfigInBoolean","MIDSCENE_USE_QWEN_VL","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent","AiAssert","assertion","systemPromptToAssert","MIDSCENE_USE_VLM_UI_TARS","assertionText","assertResult","usage"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;;;;AC6DA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OAMD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,MAAM,EAAE,GAAGH;IACtD,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAC7B,MAAM,EAAEI,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBP;IAEzBQ,IAAAA,sBAAAA,MAAAA,AAAAA,EACEP,0BACA;IAGF,MAAMQ,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0Bf,wBAAwBY;IACpD;IACA,MAAMU,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IAEjD,IAAIC,eAAeX;IAEnB,IAAIJ,QAAQ,YAAY,EAAE;QACxBS,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFS,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFe,eAAef,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIc,AAAmB,cAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACTC,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACD,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACVC,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBb,kBACAH,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKG;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAML;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOR,0BAAuC;QAChD,MAAMgB,SAAS,MAAM1B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAMC,WACJhB,UAAUiB,yBAAAA,mBAAmBA;IAE/B,MAAMC,MAAM,MAAMF,SAASxB,MAAM2B,mCAAAA,YAAAA,CAAAA,eAA4B;IAE7D,MAAMC,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBACAC,6BAAAA,wBACAC,6BAAAA;YALFP,UAAUQ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK5B,QAAQ,IAAI,CAAC,KAAK,EACvD6B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK7B,QAAQ,IAAI,CAAC,MAAM,UACzD8B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG;YAEjC7C,aAAa,WAAWsC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiCnC,QAAQ,IAAI,EAAEiC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAU5B,wBAAwB2B;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAjB;QACA,OAAOe,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB5C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE4C,kBAAkB,EAAE,GAAG7C;IACxC,MAAM,EAAEI,gBAAgB,EAAE,GAAGH;IAE7B,MAAMW,eAAekC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BhC,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IACjD,MAAMiC,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoB1D,wBAAwBuD;IAC9C;IACA,MAAMlD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM2C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAM3B,SAAS,MAAM1B,mBAAmB;YACtC,QAAQqD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAlD,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBvD,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB;IAG3B,IAAI6B;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAnD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM;QAErBZ,aAAa,0BAA0BgE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D5D,aAAa,wBAAwBiE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EAAgBuB,MAAMvD,QAAQ,IAAI,CAAC,KAAK,EAAEA,QAAQ,IAAI,CAAC,MAAM;QAExEZ,aAAa,qBAAqBkE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DlE,aAAa,iBAAiBoE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYxD,QAAQ,IAAI;QACvDZ,aAAa,2BAA2B8D;IAC1C;IAEA,IAAIS,cAAcxD;IAClB,IAAI+C,aACFS,cAAc,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAClBzD,kBACA+C,aACAW,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA,EAAqBC,oBAAAA,oBAAoBA;IAI7C,OAAO;QACL,MAAMZ;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAee,qBAGpBhE,OAKD;QA0CKiE;IAzCJ,MAAM,EAAEC,SAAS,EAAEjE,OAAO,EAAEkE,aAAa,EAAE1E,gBAAgB,EAAE,GAAGO;IAChE,MAAMY,eAAewD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAEhE,gBAAgB,EAAE,GAAGH;IAC7B,MAAM,EAAEI,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBP,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAakE,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;IACzC;IAEA,MAAME,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClCjE,aACA6D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKnE;YACL,QAAQ;QACV;IACF;IAGFmE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM1E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS2D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCtE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAMyB,SAAS,MAAM1B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM+B,SAAS,MAAMC,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EACnBvD,MACA2B,mCAAAA,YAAAA,CAAAA,YAAyB;IAE3B,OAAO;QACL,aAAa2B,OAAO,OAAO;QAC3B3C;QACA,OAAO2C,OAAO,KAAK;IACrB;AACF;AAEO,eAAeuB,SAEpBxE,OAAoE;IACpE,MAAM,EAAEyE,SAAS,EAAExE,OAAO,EAAE,GAAGD;IAE/BS,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOgE,WAAW;IAElB,MAAM,EAAErE,gBAAgB,EAAE,GAAGH;IAE7B,MAAMW,eAAe8D,AAAAA,IAAAA,6BAAAA,oBAAAA,AAAAA,EAAqB;QACxC,UAAUZ,AAAAA,IAAAA,oBAAAA,oBAAAA,AAAAA,EAAqBa,oBAAAA,wBAAwBA;IACzD;IAEA,MAAMC,gBAAgBtF,wBAAwBmF;IAE9C,MAAM9E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASiB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM,CAAC;;;AAGjB,EAAEwE,cAAc;;EAEd,CAAC;gBACK;aACD;QACH;KACD;IAED,IAAI,AAAqB,YAArB,OAAOH,WAAwB;QACjC,MAAMvD,SAAS,MAAM1B,mBAAmB;YACtC,QAAQiF,UAAU,MAAM;YACxB,yBAAyBA,UAAU,uBAAuB;QAC5D;QACA9E,KAAK,IAAI,IAAIuB;IACf;IAEA,MAAM,EAAE,SAAS2D,YAAY,EAAEC,KAAK,EAAE,GAAG,MAAM5B,AAAAA,IAAAA,mCAAAA,QAAAA,AAAAA,EAC7CvD,MACA2B,mCAAAA,YAAAA,CAAAA,MAAmB;IAErB,OAAO;QACL,SAASuD;QACTC;IACF;AACF"}
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
plan: ()=>plan
|
|
28
|
+
});
|
|
29
|
+
const env_namespaceObject = require("@midscene/shared/env");
|
|
30
|
+
const img_namespaceObject = require("@midscene/shared/img");
|
|
31
|
+
const utils_namespaceObject = require("@midscene/shared/utils");
|
|
32
|
+
const external_common_js_namespaceObject = require("./common.js");
|
|
33
|
+
const llm_planning_js_namespaceObject = require("./prompt/llm-planning.js");
|
|
34
|
+
const util_js_namespaceObject = require("./prompt/util.js");
|
|
35
|
+
async function plan(userInstruction, opts) {
|
|
36
|
+
var _planFromAI_action;
|
|
37
|
+
const { callAI, context } = opts || {};
|
|
38
|
+
const { screenshotBase64, size } = context;
|
|
39
|
+
const { description: pageDescription, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context);
|
|
40
|
+
const systemPrompt = await (0, llm_planning_js_namespaceObject.systemPromptToTaskPlanning)({
|
|
41
|
+
actionSpace: opts.actionSpace,
|
|
42
|
+
vlMode: (0, env_namespaceObject.vlLocateMode)()
|
|
43
|
+
});
|
|
44
|
+
const taskBackgroundContextText = (0, llm_planning_js_namespaceObject.generateTaskBackgroundContext)(userInstruction, opts.log, opts.actionContext);
|
|
45
|
+
const userInstructionPrompt = await (0, llm_planning_js_namespaceObject.automationUserPrompt)((0, env_namespaceObject.vlLocateMode)()).format({
|
|
46
|
+
pageDescription,
|
|
47
|
+
taskBackgroundContext: taskBackgroundContextText
|
|
48
|
+
});
|
|
49
|
+
let imagePayload = screenshotBase64;
|
|
50
|
+
if ('qwen-vl' === (0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
51
|
+
else if (!(0, env_namespaceObject.vlLocateMode)()) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
52
|
+
(0, external_common_js_namespaceObject.warnGPT4oSizeLimit)(size);
|
|
53
|
+
const msgs = [
|
|
54
|
+
{
|
|
55
|
+
role: 'system',
|
|
56
|
+
content: systemPrompt
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
role: 'user',
|
|
60
|
+
content: [
|
|
61
|
+
{
|
|
62
|
+
type: 'image_url',
|
|
63
|
+
image_url: {
|
|
64
|
+
url: imagePayload,
|
|
65
|
+
detail: 'high'
|
|
66
|
+
}
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
type: 'text',
|
|
70
|
+
text: userInstructionPrompt
|
|
71
|
+
}
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
];
|
|
75
|
+
const call = callAI || external_common_js_namespaceObject.callAiFn;
|
|
76
|
+
const { content, usage } = await call(msgs, external_common_js_namespaceObject.AIActionType.PLAN);
|
|
77
|
+
const rawResponse = JSON.stringify(content, void 0, 2);
|
|
78
|
+
const planFromAI = content;
|
|
79
|
+
const actions = ((null == (_planFromAI_action = planFromAI.action) ? void 0 : _planFromAI_action.type) ? [
|
|
80
|
+
planFromAI.action
|
|
81
|
+
] : planFromAI.actions) || [];
|
|
82
|
+
const returnValue = {
|
|
83
|
+
...planFromAI,
|
|
84
|
+
actions,
|
|
85
|
+
rawResponse,
|
|
86
|
+
usage,
|
|
87
|
+
yamlFlow: (0, external_common_js_namespaceObject.buildYamlFlowFromPlans)(actions, planFromAI.sleep)
|
|
88
|
+
};
|
|
89
|
+
(0, utils_namespaceObject.assert)(planFromAI, "can't get plans from AI");
|
|
90
|
+
if ((0, env_namespaceObject.vlLocateMode)()) {
|
|
91
|
+
actions.forEach((action)=>{
|
|
92
|
+
if (action.locate) try {
|
|
93
|
+
action.locate = (0, external_common_js_namespaceObject.fillBboxParam)(action.locate, size.width, size.height);
|
|
94
|
+
} catch (e) {
|
|
95
|
+
throw new Error(`Failed to fill locate param: ${planFromAI.error} (${e instanceof Error ? e.message : 'unknown error'})`, {
|
|
96
|
+
cause: e
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
(0, utils_namespaceObject.assert)(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
101
|
+
} else actions.forEach((action)=>{
|
|
102
|
+
var _action_locate;
|
|
103
|
+
if (null == (_action_locate = action.locate) ? void 0 : _action_locate.id) {
|
|
104
|
+
const element = elementById(action.locate.id);
|
|
105
|
+
if (element) action.locate.id = element.id;
|
|
106
|
+
}
|
|
107
|
+
});
|
|
108
|
+
if (0 === actions.length && returnValue.more_actions_needed_by_instruction && !returnValue.sleep) console.warn('No actions planned for the prompt, but model said more actions are needed:', userInstruction);
|
|
109
|
+
return returnValue;
|
|
110
|
+
}
|
|
111
|
+
exports.plan = __webpack_exports__.plan;
|
|
112
|
+
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
113
|
+
"plan"
|
|
114
|
+
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
115
|
+
Object.defineProperty(exports, '__esModule', {
|
|
116
|
+
value: true
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
//# sourceMappingURL=llm-planning.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n PageType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport { vlLocateMode } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n callAiFn,\n fillBboxParam,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n pageType: PageType;\n actionSpace: DeviceAction[];\n callAI?: typeof callAiFn<PlanningAIResponse>;\n log?: string;\n actionContext?: string;\n },\n): Promise<PlanningAIResponse> {\n const { callAI, context } = opts || {};\n const { screenshotBase64, size } = context;\n const { description: pageDescription, elementById } =\n await describeUserPage(context);\n\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlLocateMode(),\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(\n vlLocateMode(),\n ).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlLocateMode() === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlLocateMode()) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const call = callAI || callAiFn;\n const { content, usage } = await call(msgs, AIActionType.PLAN);\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(actions, planFromAI.sleep),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n if (vlLocateMode()) {\n actions.forEach((action) => {\n if (action.locate) {\n try {\n action.locate = fillBboxParam(action.locate, size.width, size.height);\n } catch (e) {\n throw new Error(\n `Failed to fill locate param: ${planFromAI.error} (${\n e instanceof Error ? e.message : 'unknown error'\n })`,\n {\n cause: e,\n },\n );\n }\n }\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n } else {\n actions.forEach((action) => {\n if (action.locate?.id) {\n // The model may return indexId, need to perform a query correction to avoid exceptions\n const element = elementById(action.locate.id);\n if (element) {\n action.locate.id = element.id;\n }\n }\n });\n }\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","plan","userInstruction","opts","_planFromAI_action","callAI","context","screenshotBase64","size","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","vlLocateMode","taskBackgroundContextText","generateTaskBackgroundContext","userInstructionPrompt","automationUserPrompt","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","msgs","call","callAiFn","content","usage","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","fillBboxParam","e","Error","_action_locate","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;ACmBO,eAAeI,KACpBC,eAAuB,EACvBC,IAOC;QA8DEC;IA5DH,MAAM,EAAEC,MAAM,EAAEC,OAAO,EAAE,GAAGH,QAAQ,CAAC;IACrC,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGF;IACnC,MAAM,EAAE,aAAaG,eAAe,EAAEC,WAAW,EAAE,GACjD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBL;IAEzB,MAAMM,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaV,KAAK,WAAW;QAC7B,QAAQW,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA;IACV;IACA,MAAMC,4BAA4BC,AAAAA,IAAAA,gCAAAA,6BAAAA,AAAAA,EAChCd,iBACAC,KAAK,GAAG,EACRA,KAAK,aAAa;IAEpB,MAAMc,wBAAwB,MAAMC,AAAAA,IAAAA,gCAAAA,oBAAAA,AAAAA,EAClCJ,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACA,MAAM,CAAC;QACPL;QACA,uBAAuBM;IACzB;IAEA,IAAII,eAAeZ;IACnB,IAAIO,AAAmB,cAAnBA,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACFK,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACL,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KACVK,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBd,kBACAD,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBgB,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBd;IAEnB,MAAMe,OAAe;QACnB;YAAE,MAAM;YAAU,SAASX;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKO;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMF;gBACR;aACD;QACH;KACD;IAED,MAAMO,OAAOnB,UAAUoB,mCAAAA,QAAQA;IAC/B,MAAM,EAAEC,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMH,KAAKD,MAAMK,mCAAAA,YAAAA,CAAAA,IAAiB;IAC7D,MAAMC,cAAcC,KAAK,SAAS,CAACJ,SAASK,QAAW;IACvD,MAAMC,aAAaN;IAEnB,MAAMO,UACH7B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC4B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAF;QACA,UAAUQ,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EAAuBF,SAASD,WAAW,KAAK;IAC5D;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAEnB,IAAIlB,AAAAA,IAAAA,oBAAAA,YAAAA,AAAAA,KAAgB;QAClBmB,QAAQ,OAAO,CAAC,CAACI;YACf,IAAIA,OAAO,MAAM,EACf,IAAI;gBACFA,OAAO,MAAM,GAAGC,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EAAcD,OAAO,MAAM,EAAE7B,KAAK,KAAK,EAAEA,KAAK,MAAM;YACtE,EAAE,OAAO+B,GAAG;gBACV,MAAM,IAAIC,MACR,CAAC,6BAA6B,EAAER,WAAW,KAAK,CAAC,EAAE,EACjDO,aAAaC,QAAQD,EAAE,OAAO,GAAG,gBAClC,CAAC,CAAC,EACH;oBACE,OAAOA;gBACT;YAEJ;QAEJ;QAEAH,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IACzE,OACEC,QAAQ,OAAO,CAAC,CAACI;YACXI;QAAJ,IAAI,QAAAA,CAAAA,iBAAAA,OAAO,MAAM,AAAD,IAAZA,KAAAA,IAAAA,eAAe,EAAE,EAAE;YAErB,MAAMC,UAAUhC,YAAY2B,OAAO,MAAM,CAAC,EAAE;YAC5C,IAAIK,SACFL,OAAO,MAAM,CAAC,EAAE,GAAGK,QAAQ,EAAE;QAEjC;IACF;IAGF,IACET,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBS,QAAQ,IAAI,CACV,8EACAzC;IAIJ,OAAOgC;AACT"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
assertSchema: ()=>assertSchema,
|
|
28
|
+
systemPromptToAssert: ()=>systemPromptToAssert
|
|
29
|
+
});
|
|
30
|
+
const env_namespaceObject = require("@midscene/shared/env");
|
|
31
|
+
const defaultAssertionPrompt = 'You are a senior testing engineer. User will give an assertion and a screenshot of a page. By carefully viewing the screenshot, please tell whether the assertion is truthy.';
|
|
32
|
+
const defaultAssertionResponseJsonFormat = `Return in the following JSON format:
|
|
33
|
+
{
|
|
34
|
+
pass: boolean, // whether the assertion is truthy
|
|
35
|
+
thought: string | null, // string, if the result is falsy, give the reason why it is falsy. Otherwise, put null.
|
|
36
|
+
}`;
|
|
37
|
+
const getUiTarsAssertionResponseJsonFormat = ()=>`## Output Json String Format
|
|
38
|
+
\`\`\`
|
|
39
|
+
"{
|
|
40
|
+
"pass": <<is a boolean value from the enum [true, false], true means the assertion is truthy>>,
|
|
41
|
+
"thought": "<<is a string, give the reason why the assertion is falsy or truthy. Otherwise.>>"
|
|
42
|
+
}"
|
|
43
|
+
\`\`\`
|
|
44
|
+
|
|
45
|
+
## Rules **MUST** follow
|
|
46
|
+
- Make sure to return **only** the JSON, with **no additional** text or explanations.
|
|
47
|
+
- Use ${(0, env_namespaceObject.getPreferredLanguage)()} in \`thought\` part.
|
|
48
|
+
- You **MUST** strictly follow up the **Output Json String Format**.`;
|
|
49
|
+
function systemPromptToAssert(model) {
|
|
50
|
+
return `${defaultAssertionPrompt}
|
|
51
|
+
|
|
52
|
+
${model.isUITars ? getUiTarsAssertionResponseJsonFormat() : defaultAssertionResponseJsonFormat}`;
|
|
53
|
+
}
|
|
54
|
+
const assertSchema = {
|
|
55
|
+
type: 'json_schema',
|
|
56
|
+
json_schema: {
|
|
57
|
+
name: 'assert',
|
|
58
|
+
strict: true,
|
|
59
|
+
schema: {
|
|
60
|
+
type: 'object',
|
|
61
|
+
properties: {
|
|
62
|
+
pass: {
|
|
63
|
+
type: 'boolean',
|
|
64
|
+
description: 'Whether the assertion passed or failed'
|
|
65
|
+
},
|
|
66
|
+
thought: {
|
|
67
|
+
type: [
|
|
68
|
+
'string',
|
|
69
|
+
'null'
|
|
70
|
+
],
|
|
71
|
+
description: 'The thought process behind the assertion'
|
|
72
|
+
}
|
|
73
|
+
},
|
|
74
|
+
required: [
|
|
75
|
+
'pass',
|
|
76
|
+
'thought'
|
|
77
|
+
],
|
|
78
|
+
additionalProperties: false
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
exports.assertSchema = __webpack_exports__.assertSchema;
|
|
83
|
+
exports.systemPromptToAssert = __webpack_exports__.systemPromptToAssert;
|
|
84
|
+
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
85
|
+
"assertSchema",
|
|
86
|
+
"systemPromptToAssert"
|
|
87
|
+
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
88
|
+
Object.defineProperty(exports, '__esModule', {
|
|
89
|
+
value: true
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
//# sourceMappingURL=assertion.js.map
|