@midscene/core 0.28.12-beta-20250923124135.0 → 0.28.12-beta-20250924091555.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent/agent.mjs +1 -1
- package/dist/es/agent/agent.mjs.map +1 -1
- package/dist/es/agent/tasks.mjs +45 -160
- package/dist/es/agent/tasks.mjs.map +1 -1
- package/dist/es/agent/utils.mjs +1 -1
- package/dist/es/ai-model/common.mjs +25 -10
- package/dist/es/ai-model/common.mjs.map +1 -1
- package/dist/es/ai-model/conversation-history.mjs +58 -0
- package/dist/es/ai-model/conversation-history.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +3 -2
- package/dist/es/ai-model/inspect.mjs +30 -7
- package/dist/es/ai-model/inspect.mjs.map +1 -1
- package/dist/es/ai-model/llm-planning.mjs +72 -15
- package/dist/es/ai-model/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/prompt/llm-planning.mjs +7 -61
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -1
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -1
- package/dist/es/ai-model/ui-tars-planning.mjs +40 -18
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -1
- package/dist/es/index.mjs.map +1 -1
- package/dist/es/insight/index.mjs +2 -1
- package/dist/es/insight/index.mjs.map +1 -1
- package/dist/es/types.mjs.map +1 -1
- package/dist/es/utils.mjs +2 -2
- package/dist/lib/agent/agent.js +1 -1
- package/dist/lib/agent/agent.js.map +1 -1
- package/dist/lib/agent/tasks.js +44 -159
- package/dist/lib/agent/tasks.js.map +1 -1
- package/dist/lib/agent/utils.js +1 -1
- package/dist/lib/ai-model/common.js +27 -9
- package/dist/lib/ai-model/common.js.map +1 -1
- package/dist/lib/ai-model/conversation-history.js +92 -0
- package/dist/lib/ai-model/conversation-history.js.map +1 -0
- package/dist/lib/ai-model/index.js +8 -4
- package/dist/lib/ai-model/inspect.js +30 -7
- package/dist/lib/ai-model/inspect.js.map +1 -1
- package/dist/lib/ai-model/llm-planning.js +71 -14
- package/dist/lib/ai-model/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/prompt/llm-planning.js +7 -67
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -1
- package/dist/lib/ai-model/service-caller/index.js.map +1 -1
- package/dist/lib/ai-model/ui-tars-planning.js +42 -20
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -1
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/insight/index.js +2 -1
- package/dist/lib/insight/index.js.map +1 -1
- package/dist/lib/types.js.map +1 -1
- package/dist/lib/utils.js +2 -2
- package/dist/types/agent/tasks.d.ts +4 -17
- package/dist/types/ai-model/common.d.ts +4 -3
- package/dist/types/ai-model/conversation-history.d.ts +18 -0
- package/dist/types/ai-model/index.d.ts +2 -1
- package/dist/types/ai-model/llm-planning.d.ts +2 -1
- package/dist/types/ai-model/prompt/llm-planning.d.ts +0 -6
- package/dist/types/ai-model/service-caller/index.d.ts +1 -1
- package/dist/types/ai-model/ui-tars-planning.d.ts +6 -18
- package/dist/types/index.d.ts +1 -1
- package/dist/types/types.d.ts +0 -1
- package/dist/types/yaml.d.ts +1 -11
- package/package.json +3 -3
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ('undefined' != typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
ConversationHistory: ()=>ConversationHistory
|
|
28
|
+
});
|
|
29
|
+
function _define_property(obj, key, value) {
|
|
30
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
31
|
+
value: value,
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true
|
|
35
|
+
});
|
|
36
|
+
else obj[key] = value;
|
|
37
|
+
return obj;
|
|
38
|
+
}
|
|
39
|
+
var _computedKey;
|
|
40
|
+
_computedKey = Symbol.iterator;
|
|
41
|
+
let _computedKey1 = _computedKey;
|
|
42
|
+
class ConversationHistory {
|
|
43
|
+
append(message) {
|
|
44
|
+
if ('user' === message.role) this.pruneOldestUserMessageIfNecessary();
|
|
45
|
+
this.messages.push(message);
|
|
46
|
+
}
|
|
47
|
+
seed(messages) {
|
|
48
|
+
this.reset();
|
|
49
|
+
messages.forEach((message)=>{
|
|
50
|
+
this.append(message);
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
reset() {
|
|
54
|
+
this.messages.length = 0;
|
|
55
|
+
}
|
|
56
|
+
snapshot() {
|
|
57
|
+
return [
|
|
58
|
+
...this.messages
|
|
59
|
+
];
|
|
60
|
+
}
|
|
61
|
+
get length() {
|
|
62
|
+
return this.messages.length;
|
|
63
|
+
}
|
|
64
|
+
[_computedKey1]() {
|
|
65
|
+
return this.messages[Symbol.iterator]();
|
|
66
|
+
}
|
|
67
|
+
toJSON() {
|
|
68
|
+
return this.snapshot();
|
|
69
|
+
}
|
|
70
|
+
pruneOldestUserMessageIfNecessary() {
|
|
71
|
+
const userMessages = this.messages.filter((item)=>'user' === item.role);
|
|
72
|
+
if (userMessages.length < this.maxUserImageMessages) return;
|
|
73
|
+
const firstUserMessageIndex = this.messages.findIndex((item)=>'user' === item.role);
|
|
74
|
+
if (firstUserMessageIndex >= 0) this.messages.splice(firstUserMessageIndex, 1);
|
|
75
|
+
}
|
|
76
|
+
constructor(options){
|
|
77
|
+
var _options_initialMessages;
|
|
78
|
+
_define_property(this, "maxUserImageMessages", void 0);
|
|
79
|
+
_define_property(this, "messages", []);
|
|
80
|
+
this.maxUserImageMessages = (null == options ? void 0 : options.maxUserImageMessages) ?? 4;
|
|
81
|
+
if (null == options ? void 0 : null == (_options_initialMessages = options.initialMessages) ? void 0 : _options_initialMessages.length) this.seed(options.initialMessages);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
exports.ConversationHistory = __webpack_exports__.ConversationHistory;
|
|
85
|
+
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
86
|
+
"ConversationHistory"
|
|
87
|
+
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
88
|
+
Object.defineProperty(exports, '__esModule', {
|
|
89
|
+
value: true
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
//# sourceMappingURL=conversation-history.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model/conversation-history.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/conversation-history.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nexport interface ConversationHistoryOptions {\n maxUserImageMessages?: number;\n initialMessages?: ChatCompletionMessageParam[];\n}\n\nexport class ConversationHistory {\n private readonly maxUserImageMessages: number;\n private readonly messages: ChatCompletionMessageParam[] = [];\n\n constructor(options?: ConversationHistoryOptions) {\n this.maxUserImageMessages = options?.maxUserImageMessages ?? 4;\n if (options?.initialMessages?.length) {\n this.seed(options.initialMessages);\n }\n }\n\n append(message: ChatCompletionMessageParam) {\n if (message.role === 'user') {\n this.pruneOldestUserMessageIfNecessary();\n }\n\n this.messages.push(message);\n }\n\n seed(messages: ChatCompletionMessageParam[]) {\n this.reset();\n messages.forEach((message) => {\n this.append(message);\n });\n }\n\n reset() {\n this.messages.length = 0;\n }\n\n snapshot(): ChatCompletionMessageParam[] {\n return [...this.messages];\n }\n\n get length(): number {\n return this.messages.length;\n }\n\n [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam> {\n return this.messages[Symbol.iterator]();\n }\n\n toJSON(): ChatCompletionMessageParam[] {\n return this.snapshot();\n }\n\n private pruneOldestUserMessageIfNecessary() {\n const userMessages = this.messages.filter((item) => item.role === 'user');\n if (userMessages.length < this.maxUserImageMessages) {\n return;\n }\n\n const firstUserMessageIndex = this.messages.findIndex(\n (item) => item.role === 'user',\n );\n\n if (firstUserMessageIndex >= 0) {\n this.messages.splice(firstUserMessageIndex, 1);\n }\n }\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","ConversationHistory","message","messages","userMessages","item","firstUserMessageIndex","options","_options_initialMessages"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;eCuCGG,OAAO,QAAQ;;AAtCX,MAAMC;IAWX,OAAOC,OAAmC,EAAE;QAC1C,IAAIA,AAAiB,WAAjBA,QAAQ,IAAI,EACd,IAAI,CAAC,iCAAiC;QAGxC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAACA;IACrB;IAEA,KAAKC,QAAsC,EAAE;QAC3C,IAAI,CAAC,KAAK;QACVA,SAAS,OAAO,CAAC,CAACD;YAChB,IAAI,CAAC,MAAM,CAACA;QACd;IACF;IAEA,QAAQ;QACN,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG;IACzB;IAEA,WAAyC;QACvC,OAAO;eAAI,IAAI,CAAC,QAAQ;SAAC;IAC3B;IAEA,IAAI,SAAiB;QACnB,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM;IAC7B;IAEA,CAAC,cAAD,GAAkE;QAChE,OAAO,IAAI,CAAC,QAAQ,CAACF,OAAO,QAAQ,CAAC;IACvC;IAEA,SAAuC;QACrC,OAAO,IAAI,CAAC,QAAQ;IACtB;IAEQ,oCAAoC;QAC1C,MAAMI,eAAe,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAACC,OAASA,AAAc,WAAdA,KAAK,IAAI;QAC7D,IAAID,aAAa,MAAM,GAAG,IAAI,CAAC,oBAAoB,EACjD;QAGF,MAAME,wBAAwB,IAAI,CAAC,QAAQ,CAAC,SAAS,CACnD,CAACD,OAASA,AAAc,WAAdA,KAAK,IAAI;QAGrB,IAAIC,yBAAyB,GAC3B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAACA,uBAAuB;IAEhD;IAvDA,YAAYC,OAAoC,CAAE;YAE5CC;QALN,uBAAiB,wBAAjB;QACA,uBAAiB,YAAyC,EAAE;QAG1D,IAAI,CAAC,oBAAoB,GAAGD,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,oBAAoB,AAAD,KAAK;QAC7D,IAAIC,QAAAA,UAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,QAAS,eAAe,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAClC,IAAI,CAAC,IAAI,CAACD,QAAQ,eAAe;IAErC;AAmDF"}
|
|
@@ -24,6 +24,7 @@ var __webpack_require__ = {};
|
|
|
24
24
|
var __webpack_exports__ = {};
|
|
25
25
|
__webpack_require__.r(__webpack_exports__);
|
|
26
26
|
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
uiTarsPlanning: ()=>external_ui_tars_planning_js_namespaceObject.uiTarsPlanning,
|
|
27
28
|
plan: ()=>external_llm_planning_js_namespaceObject.plan,
|
|
28
29
|
SizeSchema: ()=>external_common_js_namespaceObject.SizeSchema,
|
|
29
30
|
generateYamlTest: ()=>yaml_generator_js_namespaceObject.generateYamlTest,
|
|
@@ -32,15 +33,15 @@ __webpack_require__.d(__webpack_exports__, {
|
|
|
32
33
|
AiLocateSection: ()=>external_inspect_js_namespaceObject.AiLocateSection,
|
|
33
34
|
AIActionType: ()=>external_common_js_namespaceObject.AIActionType,
|
|
34
35
|
TUserPromptSchema: ()=>external_common_js_namespaceObject.TUserPromptSchema,
|
|
36
|
+
ConversationHistory: ()=>external_conversation_history_js_namespaceObject.ConversationHistory,
|
|
35
37
|
callAI: ()=>index_js_namespaceObject.callAI,
|
|
36
|
-
findAllMidsceneLocatorField: ()=>external_common_js_namespaceObject.findAllMidsceneLocatorField,
|
|
37
38
|
AiExtractElementInfo: ()=>external_inspect_js_namespaceObject.AiExtractElementInfo,
|
|
39
|
+
findAllMidsceneLocatorField: ()=>external_common_js_namespaceObject.findAllMidsceneLocatorField,
|
|
38
40
|
generatePlaywrightTest: ()=>playwright_generator_js_namespaceObject.generatePlaywrightTest,
|
|
39
41
|
generateYamlTestStream: ()=>yaml_generator_js_namespaceObject.generateYamlTestStream,
|
|
40
42
|
loadActionParam: ()=>external_common_js_namespaceObject.loadActionParam,
|
|
41
43
|
resizeImageForUiTars: ()=>external_ui_tars_planning_js_namespaceObject.resizeImageForUiTars,
|
|
42
44
|
systemPromptToLocateElement: ()=>llm_locator_js_namespaceObject.systemPromptToLocateElement,
|
|
43
|
-
vlmPlanning: ()=>external_ui_tars_planning_js_namespaceObject.vlmPlanning,
|
|
44
45
|
RectSchema: ()=>external_common_js_namespaceObject.RectSchema,
|
|
45
46
|
PointSchema: ()=>external_common_js_namespaceObject.PointSchema,
|
|
46
47
|
adaptBboxToRect: ()=>external_common_js_namespaceObject.adaptBboxToRect,
|
|
@@ -61,10 +62,12 @@ const external_inspect_js_namespaceObject = require("./inspect.js");
|
|
|
61
62
|
const external_llm_planning_js_namespaceObject = require("./llm-planning.js");
|
|
62
63
|
const external_common_js_namespaceObject = require("./common.js");
|
|
63
64
|
const external_ui_tars_planning_js_namespaceObject = require("./ui-tars-planning.js");
|
|
65
|
+
const external_conversation_history_js_namespaceObject = require("./conversation-history.js");
|
|
64
66
|
exports.AIActionType = __webpack_exports__.AIActionType;
|
|
65
67
|
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
66
68
|
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
67
69
|
exports.AiLocateSection = __webpack_exports__.AiLocateSection;
|
|
70
|
+
exports.ConversationHistory = __webpack_exports__.ConversationHistory;
|
|
68
71
|
exports.PointSchema = __webpack_exports__.PointSchema;
|
|
69
72
|
exports.RectSchema = __webpack_exports__.RectSchema;
|
|
70
73
|
exports.SizeSchema = __webpack_exports__.SizeSchema;
|
|
@@ -87,12 +90,13 @@ exports.loadActionParam = __webpack_exports__.loadActionParam;
|
|
|
87
90
|
exports.plan = __webpack_exports__.plan;
|
|
88
91
|
exports.resizeImageForUiTars = __webpack_exports__.resizeImageForUiTars;
|
|
89
92
|
exports.systemPromptToLocateElement = __webpack_exports__.systemPromptToLocateElement;
|
|
90
|
-
exports.
|
|
93
|
+
exports.uiTarsPlanning = __webpack_exports__.uiTarsPlanning;
|
|
91
94
|
for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
92
95
|
"AIActionType",
|
|
93
96
|
"AiExtractElementInfo",
|
|
94
97
|
"AiLocateElement",
|
|
95
98
|
"AiLocateSection",
|
|
99
|
+
"ConversationHistory",
|
|
96
100
|
"PointSchema",
|
|
97
101
|
"RectSchema",
|
|
98
102
|
"SizeSchema",
|
|
@@ -115,7 +119,7 @@ for(var __webpack_i__ in __webpack_exports__)if (-1 === [
|
|
|
115
119
|
"plan",
|
|
116
120
|
"resizeImageForUiTars",
|
|
117
121
|
"systemPromptToLocateElement",
|
|
118
|
-
"
|
|
122
|
+
"uiTarsPlanning"
|
|
119
123
|
].indexOf(__webpack_i__)) exports[__webpack_i__] = __webpack_exports__[__webpack_i__];
|
|
120
124
|
Object.defineProperty(exports, '__esModule', {
|
|
121
125
|
value: true
|
|
@@ -97,12 +97,30 @@ async function AiLocateElement(options) {
|
|
|
97
97
|
});
|
|
98
98
|
const systemPrompt = (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)(vlMode);
|
|
99
99
|
let imagePayload = screenshotBase64;
|
|
100
|
+
let imageWidth = context.size.width;
|
|
101
|
+
let imageHeight = context.size.height;
|
|
102
|
+
let originalImageWidth = imageWidth;
|
|
103
|
+
let originalImageHeight = imageHeight;
|
|
100
104
|
if (options.searchConfig) {
|
|
105
|
+
var _options_searchConfig_rect, _options_searchConfig_rect1;
|
|
101
106
|
(0, utils_namespaceObject.assert)(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
102
107
|
(0, utils_namespaceObject.assert)(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
103
108
|
imagePayload = options.searchConfig.imageBase64;
|
|
104
|
-
|
|
105
|
-
|
|
109
|
+
imageWidth = null == (_options_searchConfig_rect = options.searchConfig.rect) ? void 0 : _options_searchConfig_rect.width;
|
|
110
|
+
imageHeight = null == (_options_searchConfig_rect1 = options.searchConfig.rect) ? void 0 : _options_searchConfig_rect1.height;
|
|
111
|
+
originalImageWidth = imageWidth;
|
|
112
|
+
originalImageHeight = imageHeight;
|
|
113
|
+
} else if ('qwen-vl' === vlMode) {
|
|
114
|
+
const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
115
|
+
imageWidth = paddedResult.width;
|
|
116
|
+
imageHeight = paddedResult.height;
|
|
117
|
+
imagePayload = paddedResult.imageBase64;
|
|
118
|
+
} else if ('qwen3-vl' === vlMode) {
|
|
119
|
+
const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload, 32);
|
|
120
|
+
imageWidth = paddedResult.width;
|
|
121
|
+
imageHeight = paddedResult.height;
|
|
122
|
+
imagePayload = paddedResult.imageBase64;
|
|
123
|
+
} else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, context.size);
|
|
106
124
|
const msgs = [
|
|
107
125
|
{
|
|
108
126
|
role: 'system',
|
|
@@ -139,8 +157,8 @@ async function AiLocateElement(options) {
|
|
|
139
157
|
let errors = 'errors' in res.content ? res.content.errors : [];
|
|
140
158
|
try {
|
|
141
159
|
if ('bbox' in res.content && Array.isArray(res.content.bbox)) {
|
|
142
|
-
var
|
|
143
|
-
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox,
|
|
160
|
+
var _options_searchConfig_rect2, _options_searchConfig, _options_searchConfig_rect3, _options_searchConfig1;
|
|
161
|
+
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, imageWidth, imageHeight, null == (_options_searchConfig = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect2 = _options_searchConfig.rect) ? void 0 : _options_searchConfig_rect2.left, null == (_options_searchConfig1 = options.searchConfig) ? void 0 : null == (_options_searchConfig_rect3 = _options_searchConfig1.rect) ? void 0 : _options_searchConfig_rect3.top, originalImageWidth, originalImageHeight, vlMode);
|
|
144
162
|
debugInspect('resRect', resRect);
|
|
145
163
|
const rectCenter = {
|
|
146
164
|
x: resRect.left + resRect.width / 2,
|
|
@@ -219,11 +237,11 @@ async function AiLocateSection(options) {
|
|
|
219
237
|
let sectionRect;
|
|
220
238
|
const sectionBbox = result.content.bbox;
|
|
221
239
|
if (sectionBbox) {
|
|
222
|
-
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height, 0, 0, vlMode);
|
|
240
|
+
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.size.width, context.size.height, 0, 0, context.size.width, context.size.height, vlMode);
|
|
223
241
|
debugSection('original targetRect %j', targetRect);
|
|
224
242
|
const referenceBboxList = result.content.references_bbox || [];
|
|
225
243
|
debugSection('referenceBboxList %j', referenceBboxList);
|
|
226
|
-
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height, 0, 0, vlMode));
|
|
244
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.size.width, context.size.height, 0, 0, context.size.width, context.size.height, vlMode));
|
|
227
245
|
debugSection('referenceRects %j', referenceRects);
|
|
228
246
|
const mergedRect = (0, external_common_js_namespaceObject.mergeRects)([
|
|
229
247
|
targetRect,
|
|
@@ -234,7 +252,12 @@ async function AiLocateSection(options) {
|
|
|
234
252
|
debugSection('expanded sectionRect %j', sectionRect);
|
|
235
253
|
}
|
|
236
254
|
let imageBase64 = screenshotBase64;
|
|
237
|
-
if (sectionRect)
|
|
255
|
+
if (sectionRect) {
|
|
256
|
+
const croppedResult = await (0, img_namespaceObject.cropByRect)(screenshotBase64, sectionRect, 'qwen-vl' === vlMode);
|
|
257
|
+
imageBase64 = croppedResult.imageBase64;
|
|
258
|
+
sectionRect.width = croppedResult.width;
|
|
259
|
+
sectionRect.height = croppedResult.height;
|
|
260
|
+
}
|
|
238
261
|
return {
|
|
239
262
|
rect: sectionRect,
|
|
240
263
|
imageBase64,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, { vlMode });\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n } else if (vlMode === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n options.searchConfig?.rect?.width || context.size.width,\n options.searchConfig?.rect?.height || context.size.height,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n vlMode,\n );\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n imageBase64 = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const { vlMode } = modelConfig;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n vlMode,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect","_options_searchConfig_rect1","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","cropByRect","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACoDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OASD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QAAEI;IAAO;IAE3CM,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,0BACA;IAEF,MAAMU,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BjB,wBAAwBY;IACpD;IACA,MAAMY,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BV;IAEjD,IAAIW,eAAeV;IAEnB,IAAIN,QAAQ,YAAY,EAAE;QACxBW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;IACjD,OAAO,IAAIK,AAAW,cAAXA,QACTW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACX,QACVW,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBZ,kBACAL,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOV,0BAAuC;QAChD,MAAMiB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAMC,MAAM,MAAMjB,SAASR,MAAM0B,mCAAAA,YAAAA,CAAAA,eAA4B,EAAEjB;IAE/D,MAAMkB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAG1DQ,4BAAAA,uBACAC,6BAAAA,wBACAC,6BAAAA,wBACAC,6BAAAA;YALFP,UAAUQ,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRZ,IAAI,OAAO,CAAC,IAAI,EAChBQ,AAAAA,SAAAA,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,6BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,2BAA4B,KAAK,AAAD,KAAK3B,QAAQ,IAAI,CAAC,KAAK,EACvD4B,AAAAA,SAAAA,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,MAAM,AAAD,KAAK5B,QAAQ,IAAI,CAAC,MAAM,UACzD6B,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/B1B;YAEFlB,aAAa,WAAWqC;YAExB,MAAMS,aAAa;gBACjB,GAAGT,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIU,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiClC,QAAQ,IAAI,EAAEgC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAUzB,wBAAwBwB;YAGpC,IAAIC,SAAS;gBACXT,kBAAkB;oBAACS;iBAAQ;gBAC3BR,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOa,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACb,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEc,IAAI,CAAC,CAAC;aAFtBd,SAAS;YAACc;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMhB;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACAd;QACA,OAAOY,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCsB;IACR;AACF;AAEO,eAAeC,gBAAgB3C,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAE2C,kBAAkB,EAAExC,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMa,eAAe+B,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4BxC;IACjD,MAAMyC,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoBzD,wBAAwBsD;IAC9C;IACA,MAAMjD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMwC;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMzB,SAAS,MAAM3B,mBAAmB;YACtC,QAAQoD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAjD,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBtD,MACA0B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBjB;IAGF,IAAI8C;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAlD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAEFhB,aAAa,0BAA0B+D;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9D3D,aAAa,wBAAwBgE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS5B,MAAM,OAAO,CAAC4B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLuB,MACAtD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAI;QAGNhB,aAAa,qBAAqBiE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DjE,aAAa,iBAAiBmE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAYvD,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2B6D;IAC1C;IAEA,IAAIS,cAAcrD;IAClB,IAAI4C,aACFS,cAAc,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAClBtD,kBACA4C,aACA7C,AAAW,cAAXA;IAIJ,OAAO;QACL,MAAM6C;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAazB,KAAK,SAAS,CAACyB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAea,qBAGpB7D,OAMD;QA8CK8D;IA7CJ,MAAM,EAAEC,SAAS,EAAE9D,OAAO,EAAE+D,aAAa,EAAEvE,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAMU,eAAemD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAE3D,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAa+D,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;QACvC3D;IACF;IAEA,MAAM6D,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClC5D,aACAwD;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAK9D;YACL,QAAQ;QACV;IACF;IAGF8D,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAMvE,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAASsD;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCnE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAM0B,SAAS,MAAM3B,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAIwB;IACf;IAEA,MAAM6B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnBtD,MACA0B,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBjB;IAEF,OAAO;QACL,aAAa4C,OAAO,OAAO;QAC3BxC;QACA,OAAOwC,OAAO,KAAK;IACrB;AACF"}
|
|
1
|
+
{"version":3,"file":"ai-model/inspect.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/inspect.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n AIDataExtractionResponse,\n AIElementLocatorResponse,\n AIElementResponse,\n AISectionLocatorResponse,\n AIUsageInfo,\n BaseElement,\n ElementById,\n InsightExtractOption,\n Rect,\n ReferenceImage,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport {\n cropByRect,\n paddingToMatchBlockByBase64,\n preProcessImageUrl,\n} from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionSystemMessageParam,\n ChatCompletionUserMessageParam,\n} from 'openai/resources/index';\nimport type { TMultimodalPrompt, TUserPrompt } from './common';\nimport {\n AIActionType,\n adaptBboxToRect,\n expandSearchArea,\n markupImageForLLM,\n mergeRects,\n} from './common';\nimport {\n extractDataQueryPrompt,\n systemPromptToExtract,\n} from './prompt/extraction';\nimport {\n findElementPrompt,\n systemPromptToLocateElement,\n} from './prompt/llm-locator';\nimport {\n sectionLocatorInstruction,\n systemPromptToLocateSection,\n} from './prompt/llm-section-locator';\nimport {\n describeUserPage,\n distance,\n distanceThreshold,\n elementByPositionWithElementInfo,\n} from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nexport type AIArgs = [\n ChatCompletionSystemMessageParam,\n ...ChatCompletionUserMessageParam[],\n];\n\nconst debugInspect = getDebug('ai:inspect');\nconst debugSection = getDebug('ai:section');\n\nconst extraTextFromUserPrompt = (prompt: TUserPrompt): string => {\n if (typeof prompt === 'string') {\n return prompt;\n } else {\n return prompt.prompt;\n }\n};\n\nconst promptsToChatParam = async (\n multimodalPrompt: TMultimodalPrompt,\n): Promise<ChatCompletionUserMessageParam[]> => {\n const msgs: ChatCompletionUserMessageParam[] = [];\n if (multimodalPrompt?.images?.length) {\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'Next, I will provide all the reference images.',\n },\n ],\n });\n\n for (const item of multimodalPrompt.images) {\n const base64 = await preProcessImageUrl(\n item.url,\n !!multimodalPrompt.convertHttpImage2Base64,\n );\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'text',\n text: `reference image ${item.name}:`,\n },\n ],\n });\n\n msgs.push({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: base64,\n detail: 'high',\n },\n },\n ],\n });\n }\n }\n return msgs;\n};\n\nexport async function AiLocateElement<\n ElementType extends BaseElement = BaseElement,\n>(options: {\n context: UIContext<ElementType>;\n targetElementDescription: TUserPrompt;\n referenceImage?: ReferenceImage;\n callAIFn: typeof callAIWithObjectResponse<\n AIElementResponse | [number, number]\n >;\n searchConfig?: Awaited<ReturnType<typeof AiLocateSection>>;\n modelConfig: IModelConfig;\n}): Promise<{\n parseResult: AIElementLocatorResponse;\n rect?: Rect;\n rawResponse: string;\n elementById: ElementById;\n usage?: AIUsageInfo;\n isOrderSensitive?: boolean;\n}> {\n const { context, targetElementDescription, callAIFn, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const { description, elementById, insertElementByPosition } =\n await describeUserPage(context, { vlMode });\n\n assert(\n targetElementDescription,\n 'cannot find the target element description',\n );\n const userInstructionPrompt = await findElementPrompt.format({\n pageDescription: description,\n targetElementDescription: extraTextFromUserPrompt(targetElementDescription),\n });\n const systemPrompt = systemPromptToLocateElement(vlMode);\n\n let imagePayload = screenshotBase64;\n let imageWidth = context.size.width;\n let imageHeight = context.size.height;\n let originalImageWidth = imageWidth;\n let originalImageHeight = imageHeight;\n\n if (options.searchConfig) {\n assert(\n options.searchConfig.rect,\n 'searchArea is provided but its rect cannot be found. Failed to locate element',\n );\n assert(\n options.searchConfig.imageBase64,\n 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element',\n );\n\n imagePayload = options.searchConfig.imageBase64;\n imageWidth = options.searchConfig.rect?.width;\n imageHeight = options.searchConfig.rect?.height;\n originalImageWidth = imageWidth;\n originalImageHeight = imageHeight;\n } else if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (vlMode === 'qwen3-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload, 32);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n if (typeof targetElementDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: targetElementDescription.images,\n convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const res = await callAIFn(msgs, AIActionType.INSPECT_ELEMENT, modelConfig);\n\n const rawResponse = JSON.stringify(res.content);\n\n let resRect: Rect | undefined;\n let matchedElements: AIElementLocatorResponse['elements'] =\n 'elements' in res.content ? res.content.elements : [];\n let errors: AIElementLocatorResponse['errors'] | undefined =\n 'errors' in res.content ? res.content.errors : [];\n try {\n if ('bbox' in res.content && Array.isArray(res.content.bbox)) {\n resRect = adaptBboxToRect(\n res.content.bbox,\n imageWidth,\n imageHeight,\n options.searchConfig?.rect?.left,\n options.searchConfig?.rect?.top,\n originalImageWidth,\n originalImageHeight,\n vlMode,\n );\n\n debugInspect('resRect', resRect);\n\n const rectCenter = {\n x: resRect.left + resRect.width / 2,\n y: resRect.top + resRect.height / 2,\n };\n let element = elementByPositionWithElementInfo(context.tree, rectCenter);\n\n const distanceToCenter = element\n ? distance({ x: element.center[0], y: element.center[1] }, rectCenter)\n : 0;\n\n if (!element || distanceToCenter > distanceThreshold) {\n element = insertElementByPosition(rectCenter);\n }\n\n if (element) {\n matchedElements = [element];\n errors = [];\n }\n }\n } catch (e) {\n const msg =\n e instanceof Error\n ? `Failed to parse bbox: ${e.message}`\n : 'unknown error in locate';\n if (!errors || errors?.length === 0) {\n errors = [msg];\n } else {\n errors.push(`(${msg})`);\n }\n }\n\n return {\n rect: resRect,\n parseResult: {\n elements: matchedElements,\n errors,\n },\n rawResponse,\n elementById,\n usage: res.usage,\n isOrderSensitive:\n typeof res.content === 'object' &&\n res.content !== null &&\n 'isOrderSensitive' in res.content\n ? (res.content as any).isOrderSensitive\n : undefined,\n };\n}\n\nexport async function AiLocateSection(options: {\n context: UIContext<BaseElement>;\n sectionDescription: TUserPrompt;\n modelConfig: IModelConfig;\n}): Promise<{\n rect?: Rect;\n imageBase64?: string;\n error?: string;\n rawResponse: string;\n usage?: AIUsageInfo;\n}> {\n const { context, sectionDescription, modelConfig } = options;\n const { vlMode } = modelConfig;\n const { screenshotBase64 } = context;\n\n const systemPrompt = systemPromptToLocateSection(vlMode);\n const sectionLocatorInstructionText = await sectionLocatorInstruction.format({\n sectionDescription: extraTextFromUserPrompt(sectionDescription),\n });\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: sectionLocatorInstructionText,\n },\n ],\n },\n ];\n\n if (typeof sectionDescription !== 'string') {\n const addOns = await promptsToChatParam({\n images: sectionDescription.images,\n convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AISectionLocatorResponse>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n\n let sectionRect: Rect | undefined;\n const sectionBbox = result.content.bbox;\n if (sectionBbox) {\n const targetRect = adaptBboxToRect(\n sectionBbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n debugSection('original targetRect %j', targetRect);\n\n const referenceBboxList = result.content.references_bbox || [];\n debugSection('referenceBboxList %j', referenceBboxList);\n\n const referenceRects = referenceBboxList\n .filter((bbox) => Array.isArray(bbox))\n .map((bbox) => {\n return adaptBboxToRect(\n bbox,\n context.size.width,\n context.size.height,\n 0,\n 0,\n context.size.width,\n context.size.height,\n vlMode,\n );\n });\n debugSection('referenceRects %j', referenceRects);\n\n // merge the sectionRect and referenceRects\n const mergedRect = mergeRects([targetRect, ...referenceRects]);\n debugSection('mergedRect %j', mergedRect);\n\n // expand search area to at least 200 x 200\n sectionRect = expandSearchArea(mergedRect, context.size, vlMode);\n debugSection('expanded sectionRect %j', sectionRect);\n }\n\n let imageBase64 = screenshotBase64;\n if (sectionRect) {\n const croppedResult = await cropByRect(\n screenshotBase64,\n sectionRect,\n vlMode === 'qwen-vl',\n );\n imageBase64 = croppedResult.imageBase64;\n sectionRect.width = croppedResult.width;\n sectionRect.height = croppedResult.height;\n }\n\n return {\n rect: sectionRect,\n imageBase64,\n error: result.content.error,\n rawResponse: JSON.stringify(result.content),\n usage: result.usage,\n };\n}\n\nexport async function AiExtractElementInfo<\n T,\n ElementType extends BaseElement = BaseElement,\n>(options: {\n dataQuery: string | Record<string, string>;\n multimodalPrompt?: TMultimodalPrompt;\n context: UIContext<ElementType>;\n extractOption?: InsightExtractOption;\n modelConfig: IModelConfig;\n}) {\n const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } =\n options;\n const { vlMode } = modelConfig;\n const systemPrompt = systemPromptToExtract();\n\n const { screenshotBase64 } = context;\n\n const { description, elementById } = await describeUserPage(context, {\n truncateTextLength: 200,\n filterNonTextContent: false,\n visibleOnly: false,\n domIncluded: extractOption?.domIncluded,\n vlMode,\n });\n\n const extractDataPromptText = await extractDataQueryPrompt(\n description,\n dataQuery,\n );\n\n const userContent: ChatCompletionUserMessageParam['content'] = [];\n\n if (extractOption?.screenshotIncluded !== false) {\n userContent.push({\n type: 'image_url',\n image_url: {\n url: screenshotBase64,\n detail: 'high',\n },\n });\n }\n\n userContent.push({\n type: 'text',\n text: extractDataPromptText,\n });\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: userContent,\n },\n ];\n\n if (options.extractOption?.returnThought) {\n msgs.push({\n role: 'user',\n content: 'Please provide reasons.',\n });\n }\n\n if (multimodalPrompt) {\n const addOns = await promptsToChatParam({\n images: multimodalPrompt.images,\n convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64,\n });\n msgs.push(...addOns);\n }\n\n const result = await callAIWithObjectResponse<AIDataExtractionResponse<T>>(\n msgs,\n AIActionType.EXTRACT_DATA,\n modelConfig,\n );\n return {\n parseResult: result.content,\n elementById,\n usage: result.usage,\n };\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debugInspect","getDebug","debugSection","extraTextFromUserPrompt","prompt","promptsToChatParam","multimodalPrompt","_multimodalPrompt_images","msgs","item","base64","preProcessImageUrl","AiLocateElement","options","context","targetElementDescription","callAIFn","modelConfig","vlMode","screenshotBase64","description","elementById","insertElementByPosition","describeUserPage","assert","userInstructionPrompt","findElementPrompt","systemPrompt","systemPromptToLocateElement","imagePayload","imageWidth","imageHeight","originalImageWidth","originalImageHeight","_options_searchConfig_rect","_options_searchConfig_rect1","paddedResult","paddingToMatchBlockByBase64","markupImageForLLM","addOns","res","AIActionType","rawResponse","JSON","resRect","matchedElements","errors","Array","_options_searchConfig_rect2","_options_searchConfig_rect3","adaptBboxToRect","rectCenter","element","elementByPositionWithElementInfo","distanceToCenter","distance","distanceThreshold","e","msg","Error","undefined","AiLocateSection","sectionDescription","systemPromptToLocateSection","sectionLocatorInstructionText","sectionLocatorInstruction","result","callAIWithObjectResponse","sectionRect","sectionBbox","targetRect","referenceBboxList","referenceRects","bbox","mergedRect","mergeRects","expandSearchArea","imageBase64","croppedResult","cropByRect","AiExtractElementInfo","_options_extractOption","dataQuery","extractOption","systemPromptToExtract","extractDataPromptText","extractDataQueryPrompt","userContent"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;;;;;ACoDA,MAAMI,eAAeC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAC9B,MAAMC,eAAeD,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAE9B,MAAME,0BAA0B,CAACC;IAC/B,IAAI,AAAkB,YAAlB,OAAOA,QACT,OAAOA;IAEP,OAAOA,OAAO,MAAM;AAExB;AAEA,MAAMC,qBAAqB,OACzBC;QAGIC;IADJ,MAAMC,OAAyC,EAAE;IACjD,IAAID,QAAAA,mBAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,iBAAkB,MAAM,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAAE;QACpCC,KAAK,IAAI,CAAC;YACR,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM;gBACR;aACD;QACH;QAEA,KAAK,MAAMC,QAAQH,iBAAiB,MAAM,CAAE;YAC1C,MAAMI,SAAS,MAAMC,AAAAA,IAAAA,oBAAAA,kBAAAA,AAAAA,EACnBF,KAAK,GAAG,EACR,CAAC,CAACH,iBAAiB,uBAAuB;YAG5CE,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,MAAM,CAAC,gBAAgB,EAAEC,KAAK,IAAI,CAAC,CAAC,CAAC;oBACvC;iBACD;YACH;YAEAD,KAAK,IAAI,CAAC;gBACR,MAAM;gBACN,SAAS;oBACP;wBACE,MAAM;wBACN,WAAW;4BACT,KAAKE;4BACL,QAAQ;wBACV;oBACF;iBACD;YACH;QACF;IACF;IACA,OAAOF;AACT;AAEO,eAAeI,gBAEpBC,OASD;IAQC,MAAM,EAAEC,OAAO,EAAEC,wBAAwB,EAAEC,QAAQ,EAAEC,WAAW,EAAE,GAAGJ;IACrE,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAEC,uBAAuB,EAAE,GACzD,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QAAEI;IAAO;IAE3CM,IAAAA,sBAAAA,MAAAA,AAAAA,EACET,0BACA;IAEF,MAAMU,wBAAwB,MAAMC,+BAAAA,iBAAAA,CAAAA,MAAwB,CAAC;QAC3D,iBAAiBN;QACjB,0BAA0BjB,wBAAwBY;IACpD;IACA,MAAMY,eAAeC,AAAAA,IAAAA,+BAAAA,2BAAAA,AAAAA,EAA4BV;IAEjD,IAAIW,eAAeV;IACnB,IAAIW,aAAahB,QAAQ,IAAI,CAAC,KAAK;IACnC,IAAIiB,cAAcjB,QAAQ,IAAI,CAAC,MAAM;IACrC,IAAIkB,qBAAqBF;IACzB,IAAIG,sBAAsBF;IAE1B,IAAIlB,QAAQ,YAAY,EAAE;YAWXqB,4BACCC;QAXdX,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,IAAI,EACzB;QAEFW,IAAAA,sBAAAA,MAAAA,AAAAA,EACEX,QAAQ,YAAY,CAAC,WAAW,EAChC;QAGFgB,eAAehB,QAAQ,YAAY,CAAC,WAAW;QAC/CiB,aAAa,QAAAI,CAAAA,6BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,2BAA2B,KAAK;QAC7CH,cAAc,QAAAI,CAAAA,8BAAAA,QAAQ,YAAY,CAAC,IAAI,AAAD,IAAxBA,KAAAA,IAAAA,4BAA2B,MAAM;QAC/CH,qBAAqBF;QACrBG,sBAAsBF;IACxB,OAAO,IAAIb,AAAW,cAAXA,QAAsB;QAC/B,MAAMkB,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BR;QACvDC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC,OAAO,IAAIlB,AAAW,eAAXA,QAAuB;QAChC,MAAMkB,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BR,cAAc;QACrEC,aAAaM,aAAa,KAAK;QAC/BL,cAAcK,aAAa,MAAM;QACjCP,eAAeO,aAAa,WAAW;IACzC,OAAO,IAAI,CAAClB,QACVW,eAAe,MAAMS,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBnB,kBACAL,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhB,MAAMN,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKE;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMJ;gBACR;aACD;QACH;KACD;IAED,IAAI,AAAoC,YAApC,OAAOV,0BAAuC;QAChD,MAAMwB,SAAS,MAAMlC,mBAAmB;YACtC,QAAQU,yBAAyB,MAAM;YACvC,yBAAyBA,yBAAyB,uBAAuB;QAC3E;QACAP,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAMC,MAAM,MAAMxB,SAASR,MAAMiC,mCAAAA,YAAAA,CAAAA,eAA4B,EAAExB;IAE/D,MAAMyB,cAAcC,KAAK,SAAS,CAACH,IAAI,OAAO;IAE9C,IAAII;IACJ,IAAIC,kBACF,cAAcL,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,QAAQ,GAAG,EAAE;IACvD,IAAIM,SACF,YAAYN,IAAI,OAAO,GAAGA,IAAI,OAAO,CAAC,MAAM,GAAG,EAAE;IACnD,IAAI;QACF,IAAI,UAAUA,IAAI,OAAO,IAAIO,MAAM,OAAO,CAACP,IAAI,OAAO,CAAC,IAAI,GAAG;gBAK1DQ,6BAAAA,uBACAC,6BAAAA;YALFL,UAAUM,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACRV,IAAI,OAAO,CAAC,IAAI,EAChBV,YACAC,aAAAA,QACAiB,CAAAA,wBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,sBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,IAAI,UAChCC,CAAAA,yBAAAA,QAAQ,YAAY,AAAD,IAAnBA,KAAAA,IAAAA,QAAAA,CAAAA,8BAAAA,uBAAsB,IAAI,AAAD,IAAzBA,KAAAA,IAAAA,4BAA4B,GAAG,EAC/BjB,oBACAC,qBACAf;YAGFlB,aAAa,WAAW4C;YAExB,MAAMO,aAAa;gBACjB,GAAGP,QAAQ,IAAI,GAAGA,QAAQ,KAAK,GAAG;gBAClC,GAAGA,QAAQ,GAAG,GAAGA,QAAQ,MAAM,GAAG;YACpC;YACA,IAAIQ,UAAUC,AAAAA,IAAAA,wBAAAA,gCAAAA,AAAAA,EAAiCvC,QAAQ,IAAI,EAAEqC;YAE7D,MAAMG,mBAAmBF,UACrBG,AAAAA,IAAAA,wBAAAA,QAAAA,AAAAA,EAAS;gBAAE,GAAGH,QAAQ,MAAM,CAAC,EAAE;gBAAE,GAAGA,QAAQ,MAAM,CAAC,EAAE;YAAC,GAAGD,cACzD;YAEJ,IAAI,CAACC,WAAWE,mBAAmBE,wBAAAA,iBAAiBA,EAClDJ,UAAU9B,wBAAwB6B;YAGpC,IAAIC,SAAS;gBACXP,kBAAkB;oBAACO;iBAAQ;gBAC3BN,SAAS,EAAE;YACb;QACF;IACF,EAAE,OAAOW,GAAG;QACV,MAAMC,MACJD,aAAaE,QACT,CAAC,sBAAsB,EAAEF,EAAE,OAAO,EAAE,GACpC;QACN,IAAI,AAACX,UAAUA,AAAAA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,MAAM,AAAD,MAAM,GAGhCA,OAAO,IAAI,CAAC,CAAC,CAAC,EAAEY,IAAI,CAAC,CAAC;aAFtBZ,SAAS;YAACY;SAAI;IAIlB;IAEA,OAAO;QACL,MAAMd;QACN,aAAa;YACX,UAAUC;YACVC;QACF;QACAJ;QACArB;QACA,OAAOmB,IAAI,KAAK;QAChB,kBACE,AAAuB,YAAvB,OAAOA,IAAI,OAAO,IAClBA,AAAgB,SAAhBA,IAAI,OAAO,IACX,sBAAsBA,IAAI,OAAO,GAC5BA,IAAI,OAAO,CAAS,gBAAgB,GACrCoB;IACR;AACF;AAEO,eAAeC,gBAAgBhD,OAIrC;IAOC,MAAM,EAAEC,OAAO,EAAEgD,kBAAkB,EAAE7C,WAAW,EAAE,GAAGJ;IACrD,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAM,EAAEE,gBAAgB,EAAE,GAAGL;IAE7B,MAAMa,eAAeoC,AAAAA,IAAAA,uCAAAA,2BAAAA,AAAAA,EAA4B7C;IACjD,MAAM8C,gCAAgC,MAAMC,uCAAAA,yBAAAA,CAAAA,MAAgC,CAAC;QAC3E,oBAAoB9D,wBAAwB2D;IAC9C;IACA,MAAMtD,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKR;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAM6C;gBACR;aACD;QACH;KACD;IAED,IAAI,AAA8B,YAA9B,OAAOF,oBAAiC;QAC1C,MAAMvB,SAAS,MAAMlC,mBAAmB;YACtC,QAAQyD,mBAAmB,MAAM;YACjC,yBAAyBA,mBAAmB,uBAAuB;QACrE;QACAtD,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAM2B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB3D,MACAiC,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBxB;IAGF,IAAImD;IACJ,MAAMC,cAAcH,OAAO,OAAO,CAAC,IAAI;IACvC,IAAIG,aAAa;QACf,MAAMC,aAAapB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACjBmB,aACAvD,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAEFhB,aAAa,0BAA0BoE;QAEvC,MAAMC,oBAAoBL,OAAO,OAAO,CAAC,eAAe,IAAI,EAAE;QAC9DhE,aAAa,wBAAwBqE;QAErC,MAAMC,iBAAiBD,kBACpB,MAAM,CAAC,CAACE,OAAS1B,MAAM,OAAO,CAAC0B,OAC/B,GAAG,CAAC,CAACA,OACGvB,AAAAA,IAAAA,mCAAAA,eAAAA,AAAAA,EACLuB,MACA3D,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnB,GACA,GACAA,QAAQ,IAAI,CAAC,KAAK,EAClBA,QAAQ,IAAI,CAAC,MAAM,EACnBI;QAGNhB,aAAa,qBAAqBsE;QAGlC,MAAME,aAAaC,AAAAA,IAAAA,mCAAAA,UAAAA,AAAAA,EAAW;YAACL;eAAeE;SAAe;QAC7DtE,aAAa,iBAAiBwE;QAG9BN,cAAcQ,AAAAA,IAAAA,mCAAAA,gBAAAA,AAAAA,EAAiBF,YAAY5D,QAAQ,IAAI,EAAEI;QACzDhB,aAAa,2BAA2BkE;IAC1C;IAEA,IAAIS,cAAc1D;IAClB,IAAIiD,aAAa;QACf,MAAMU,gBAAgB,MAAMC,AAAAA,IAAAA,oBAAAA,UAAAA,AAAAA,EAC1B5D,kBACAiD,aACAlD,AAAW,cAAXA;QAEF2D,cAAcC,cAAc,WAAW;QACvCV,YAAY,KAAK,GAAGU,cAAc,KAAK;QACvCV,YAAY,MAAM,GAAGU,cAAc,MAAM;IAC3C;IAEA,OAAO;QACL,MAAMV;QACNS;QACA,OAAOX,OAAO,OAAO,CAAC,KAAK;QAC3B,aAAavB,KAAK,SAAS,CAACuB,OAAO,OAAO;QAC1C,OAAOA,OAAO,KAAK;IACrB;AACF;AAEO,eAAec,qBAGpBnE,OAMD;QA8CKoE;IA7CJ,MAAM,EAAEC,SAAS,EAAEpE,OAAO,EAAEqE,aAAa,EAAE7E,gBAAgB,EAAEW,WAAW,EAAE,GACxEJ;IACF,MAAM,EAAEK,MAAM,EAAE,GAAGD;IACnB,MAAMU,eAAeyD,AAAAA,IAAAA,8BAAAA,qBAAAA,AAAAA;IAErB,MAAM,EAAEjE,gBAAgB,EAAE,GAAGL;IAE7B,MAAM,EAAEM,WAAW,EAAEC,WAAW,EAAE,GAAG,MAAME,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAAiBT,SAAS;QACnE,oBAAoB;QACpB,sBAAsB;QACtB,aAAa;QACb,aAAaqE,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,WAAW;QACvCjE;IACF;IAEA,MAAMmE,wBAAwB,MAAMC,AAAAA,IAAAA,8BAAAA,sBAAAA,AAAAA,EAClClE,aACA8D;IAGF,MAAMK,cAAyD,EAAE;IAEjE,IAAIJ,AAAAA,CAAAA,QAAAA,gBAAAA,KAAAA,IAAAA,cAAe,kBAAkB,AAAD,MAAM,OACxCI,YAAY,IAAI,CAAC;QACf,MAAM;QACN,WAAW;YACT,KAAKpE;YACL,QAAQ;QACV;IACF;IAGFoE,YAAY,IAAI,CAAC;QACf,MAAM;QACN,MAAMF;IACR;IAEA,MAAM7E,OAAe;QACnB;YAAE,MAAM;YAAU,SAASmB;QAAa;QACxC;YACE,MAAM;YACN,SAAS4D;QACX;KACD;IAED,IAAI,QAAAN,CAAAA,yBAAAA,QAAQ,aAAa,AAAD,IAApBA,KAAAA,IAAAA,uBAAuB,aAAa,EACtCzE,KAAK,IAAI,CAAC;QACR,MAAM;QACN,SAAS;IACX;IAGF,IAAIF,kBAAkB;QACpB,MAAMiC,SAAS,MAAMlC,mBAAmB;YACtC,QAAQC,iBAAiB,MAAM;YAC/B,yBAAyBA,iBAAiB,uBAAuB;QACnE;QACAE,KAAK,IAAI,IAAI+B;IACf;IAEA,MAAM2B,SAAS,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EACnB3D,MACAiC,mCAAAA,YAAAA,CAAAA,YAAyB,EACzBxB;IAEF,OAAO;QACL,aAAaiD,OAAO,OAAO;QAC3B7C;QACA,OAAO6C,OAAO,KAAK;IACrB;AACF"}
|
|
@@ -35,8 +35,8 @@ const util_js_namespaceObject = require("./prompt/util.js");
|
|
|
35
35
|
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
36
36
|
const debug = (0, logger_namespaceObject.getDebug)('planning');
|
|
37
37
|
async function plan(userInstruction, opts) {
|
|
38
|
-
var _planFromAI_action;
|
|
39
|
-
const { context, modelConfig } = opts;
|
|
38
|
+
var _opts_conversationHistory, _planFromAI_action;
|
|
39
|
+
const { context, modelConfig, conversationHistory } = opts;
|
|
40
40
|
const { screenshotBase64, size } = context;
|
|
41
41
|
const { modelName, vlMode } = modelConfig;
|
|
42
42
|
const { description: pageDescription, elementById } = await (0, util_js_namespaceObject.describeUserPage)(context, {
|
|
@@ -46,20 +46,57 @@ async function plan(userInstruction, opts) {
|
|
|
46
46
|
actionSpace: opts.actionSpace,
|
|
47
47
|
vlMode: vlMode
|
|
48
48
|
});
|
|
49
|
-
const taskBackgroundContextText = (0, llm_planning_js_namespaceObject.generateTaskBackgroundContext)(userInstruction, opts.log, opts.actionContext);
|
|
50
|
-
const userInstructionPrompt = await (0, llm_planning_js_namespaceObject.automationUserPrompt)(vlMode).format({
|
|
51
|
-
pageDescription,
|
|
52
|
-
taskBackgroundContext: taskBackgroundContextText
|
|
53
|
-
});
|
|
54
49
|
let imagePayload = screenshotBase64;
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
let imageWidth = size.width;
|
|
51
|
+
let imageHeight = size.height;
|
|
52
|
+
const rightLimit = imageWidth;
|
|
53
|
+
const bottomLimit = imageHeight;
|
|
54
|
+
if ('qwen-vl' === vlMode) {
|
|
55
|
+
const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
56
|
+
imageWidth = paddedResult.width;
|
|
57
|
+
imageHeight = paddedResult.height;
|
|
58
|
+
imagePayload = paddedResult.imageBase64;
|
|
59
|
+
} else if ('qwen3-vl' === vlMode) {
|
|
60
|
+
const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload, 32);
|
|
61
|
+
imageWidth = paddedResult.width;
|
|
62
|
+
imageHeight = paddedResult.height;
|
|
63
|
+
imagePayload = paddedResult.imageBase64;
|
|
64
|
+
} else if (!vlMode) imagePayload = await (0, external_common_js_namespaceObject.markupImageForLLM)(screenshotBase64, context.tree, {
|
|
65
|
+
width: imageWidth,
|
|
66
|
+
height: imageHeight
|
|
67
|
+
});
|
|
57
68
|
(0, external_common_js_namespaceObject.warnGPT4oSizeLimit)(size, modelName);
|
|
69
|
+
const historyLog = (null == (_opts_conversationHistory = opts.conversationHistory) ? void 0 : _opts_conversationHistory.snapshot()) || [];
|
|
70
|
+
const knowledgeContext = opts.actionContext ? [
|
|
71
|
+
{
|
|
72
|
+
role: 'user',
|
|
73
|
+
content: [
|
|
74
|
+
{
|
|
75
|
+
type: 'text',
|
|
76
|
+
text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>`
|
|
77
|
+
}
|
|
78
|
+
]
|
|
79
|
+
}
|
|
80
|
+
] : [];
|
|
81
|
+
const instruction = [
|
|
82
|
+
{
|
|
83
|
+
role: 'user',
|
|
84
|
+
content: [
|
|
85
|
+
{
|
|
86
|
+
type: 'text',
|
|
87
|
+
text: `<user_instruction>${userInstruction}</user_instruction>`
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
];
|
|
58
92
|
const msgs = [
|
|
59
93
|
{
|
|
60
94
|
role: 'system',
|
|
61
95
|
content: systemPrompt
|
|
62
96
|
},
|
|
97
|
+
...knowledgeContext,
|
|
98
|
+
...instruction,
|
|
99
|
+
...historyLog,
|
|
63
100
|
{
|
|
64
101
|
role: 'user',
|
|
65
102
|
content: [
|
|
@@ -70,10 +107,12 @@ async function plan(userInstruction, opts) {
|
|
|
70
107
|
detail: 'high'
|
|
71
108
|
}
|
|
72
109
|
},
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
110
|
+
...vlMode ? [] : [
|
|
111
|
+
{
|
|
112
|
+
type: 'text',
|
|
113
|
+
text: pageDescription
|
|
114
|
+
}
|
|
115
|
+
]
|
|
77
116
|
]
|
|
78
117
|
}
|
|
79
118
|
];
|
|
@@ -99,7 +138,7 @@ async function plan(userInstruction, opts) {
|
|
|
99
138
|
debug('locateFields', locateFields);
|
|
100
139
|
locateFields.forEach((field)=>{
|
|
101
140
|
const locateResult = action.param[field];
|
|
102
|
-
if (locateResult) if (vlMode) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult,
|
|
141
|
+
if (locateResult) if (vlMode) action.param[field] = (0, external_common_js_namespaceObject.fillBboxParam)(locateResult, imageWidth, imageHeight, rightLimit, bottomLimit, vlMode);
|
|
103
142
|
else {
|
|
104
143
|
const element = elementById(locateResult);
|
|
105
144
|
if (element) action.param[field].id = element.id;
|
|
@@ -108,6 +147,24 @@ async function plan(userInstruction, opts) {
|
|
|
108
147
|
});
|
|
109
148
|
(0, utils_namespaceObject.assert)(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);
|
|
110
149
|
if (0 === actions.length && returnValue.more_actions_needed_by_instruction && !returnValue.sleep) console.warn('No actions planned for the prompt, but model said more actions are needed:', userInstruction);
|
|
150
|
+
null == conversationHistory || conversationHistory.append({
|
|
151
|
+
role: 'assistant',
|
|
152
|
+
content: [
|
|
153
|
+
{
|
|
154
|
+
type: 'text',
|
|
155
|
+
text: rawResponse
|
|
156
|
+
}
|
|
157
|
+
]
|
|
158
|
+
});
|
|
159
|
+
null == conversationHistory || conversationHistory.append({
|
|
160
|
+
role: 'user',
|
|
161
|
+
content: [
|
|
162
|
+
{
|
|
163
|
+
type: 'text',
|
|
164
|
+
text: 'I have finished the task'
|
|
165
|
+
}
|
|
166
|
+
]
|
|
167
|
+
});
|
|
111
168
|
return returnValue;
|
|
112
169
|
}
|
|
113
170
|
exports.plan = __webpack_exports__.plan;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport {\n AIActionType,\n type AIArgs,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport {\n automationUserPrompt,\n generateTaskBackgroundContext,\n systemPromptToTaskPlanning,\n} from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n log?: string;\n actionContext?: string;\n modelConfig: IModelConfig;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig } = opts;\n const { screenshotBase64, size } = context;\n\n const { modelName, vlMode } = modelConfig;\n\n const { description: pageDescription, elementById } = await describeUserPage(\n context,\n { vlMode },\n );\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlMode,\n });\n const taskBackgroundContextText = generateTaskBackgroundContext(\n userInstruction,\n opts.log,\n opts.actionContext,\n );\n const userInstructionPrompt = await automationUserPrompt(vlMode).format({\n pageDescription,\n taskBackgroundContext: taskBackgroundContextText,\n });\n\n let imagePayload = screenshotBase64;\n if (vlMode === 'qwen-vl') {\n imagePayload = await paddingToMatchBlockByBase64(imagePayload);\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(\n screenshotBase64,\n context.tree,\n context.size,\n );\n }\n\n warnGPT4oSizeLimit(size, modelName);\n\n const msgs: AIArgs = [\n { role: 'system', content: systemPrompt },\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n {\n type: 'text',\n text: userInstructionPrompt,\n },\n ],\n },\n ];\n\n const { content, usage } = await callAIWithObjectResponse<PlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n // TODO: use zod.parse to parse the action.param, and then fill the bbox param.\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (vlMode) {\n action.param[field] = fillBboxParam(\n locateResult,\n size.width,\n size.height,\n vlMode,\n );\n } else {\n const element = elementById(locateResult);\n if (element) {\n action.param[field].id = element.id;\n }\n }\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","_planFromAI_action","context","modelConfig","screenshotBase64","size","modelName","vlMode","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","taskBackgroundContextText","generateTaskBackgroundContext","userInstructionPrompt","automationUserPrompt","imagePayload","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","msgs","content","usage","callAIWithObjectResponse","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAOC;QAmEEC;IAjEH,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAE,GAAGH;IACjC,MAAM,EAAEI,gBAAgB,EAAEC,IAAI,EAAE,GAAGH;IAEnC,MAAM,EAAEI,SAAS,EAAEC,MAAM,EAAE,GAAGJ;IAE9B,MAAM,EAAE,aAAaK,eAAe,EAAEC,WAAW,EAAE,GAAG,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAC1DR,SACA;QAAEK;IAAO;IAEX,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAaZ,KAAK,WAAW;QAC7B,QAAQO;IACV;IACA,MAAMM,4BAA4BC,AAAAA,IAAAA,gCAAAA,6BAAAA,AAAAA,EAChCf,iBACAC,KAAK,GAAG,EACRA,KAAK,aAAa;IAEpB,MAAMe,wBAAwB,MAAMC,AAAAA,IAAAA,gCAAAA,oBAAAA,AAAAA,EAAqBT,QAAQ,MAAM,CAAC;QACtEC;QACA,uBAAuBK;IACzB;IAEA,IAAII,eAAeb;IACnB,IAAIG,AAAW,cAAXA,QACFU,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BD;SAC5C,IAAI,CAACV,QACVU,eAAe,MAAME,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EACnBf,kBACAF,QAAQ,IAAI,EACZA,QAAQ,IAAI;IAIhBkB,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBf,MAAMC;IAEzB,MAAMe,OAAe;QACnB;YAAE,MAAM;YAAU,SAASV;QAAa;QACxC;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKM;wBACL,QAAQ;oBACV;gBACF;gBACA;oBACE,MAAM;oBACN,MAAMF;gBACR;aACD;QACH;KACD;IAED,MAAM,EAAEO,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC/BH,MACAI,mCAAAA,YAAAA,CAAAA,IAAiB,EACjBtB;IAEF,MAAMuB,cAAcC,KAAK,SAAS,CAACL,SAASM,QAAW;IACvD,MAAMC,aAAaP;IAEnB,MAAMQ,UACH7B,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAAC4B,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAH;QACA,UAAUS,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACA9B,KAAK,WAAW,EAChB6B,WAAW,KAAK;IAEpB;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAGnBC,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsBpC,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACkC,SAAWA,OAAO,IAAI,KAAKC;QAG9BvC,MAAM,+BAA+BwC;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAENxC,MAAM,gBAAgByC;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,cACF,IAAIjC,QACF2B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACAnC,KAAK,KAAK,EACVA,KAAK,MAAM,EACXE;iBAEG;gBACL,MAAMmC,UAAUjC,YAAY+B;gBAC5B,IAAIE,SACFR,OAAO,KAAK,CAACK,MAAM,CAAC,EAAE,GAAGG,QAAQ,EAAE;YAEvC;QAEJ;IACF;IAEAT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEC,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACA5C;IAIJ,OAAOgC;AACT"}
|
|
1
|
+
{"version":3,"file":"ai-model/llm-planning.js","sources":["webpack://@midscene/core/webpack/runtime/define_property_getters","webpack://@midscene/core/webpack/runtime/has_own_property","webpack://@midscene/core/webpack/runtime/make_namespace_object","webpack://@midscene/core/./src/ai-model/llm-planning.ts"],"sourcesContent":["__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n if(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n Object.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n }\n }\n};","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","// define __esModule on exports\n__webpack_require__.r = (exports) => {\n\tif(typeof Symbol !== 'undefined' && Symbol.toStringTag) {\n\t\tObject.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });\n\t}\n\tObject.defineProperty(exports, '__esModule', { value: true });\n};","import type {\n DeviceAction,\n InterfaceType,\n PlanningAIResponse,\n UIContext,\n} from '@/types';\nimport type { IModelConfig } from '@midscene/shared/env';\nimport { paddingToMatchBlockByBase64 } from '@midscene/shared/img';\nimport { getDebug } from '@midscene/shared/logger';\nimport { assert } from '@midscene/shared/utils';\nimport type {\n ChatCompletionContentPart,\n ChatCompletionMessageParam,\n} from 'openai/resources/index';\nimport {\n AIActionType,\n buildYamlFlowFromPlans,\n fillBboxParam,\n findAllMidsceneLocatorField,\n markupImageForLLM,\n warnGPT4oSizeLimit,\n} from './common';\nimport type { ConversationHistory } from './conversation-history';\nimport { systemPromptToTaskPlanning } from './prompt/llm-planning';\nimport { describeUserPage } from './prompt/util';\nimport { callAIWithObjectResponse } from './service-caller/index';\n\nconst debug = getDebug('planning');\n\nexport async function plan(\n userInstruction: string,\n opts: {\n context: UIContext;\n interfaceType: InterfaceType;\n actionSpace: DeviceAction<any>[];\n actionContext?: string;\n modelConfig: IModelConfig;\n conversationHistory?: ConversationHistory;\n },\n): Promise<PlanningAIResponse> {\n const { context, modelConfig, conversationHistory } = opts;\n const { screenshotBase64, size } = context;\n\n const { modelName, vlMode } = modelConfig;\n\n const { description: pageDescription, elementById } = await describeUserPage(\n context,\n { vlMode },\n );\n const systemPrompt = await systemPromptToTaskPlanning({\n actionSpace: opts.actionSpace,\n vlMode: vlMode,\n });\n\n let imagePayload = screenshotBase64;\n let imageWidth = size.width;\n let imageHeight = size.height;\n const rightLimit = imageWidth;\n const bottomLimit = imageHeight;\n if (vlMode === 'qwen-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (vlMode === 'qwen3-vl') {\n const paddedResult = await paddingToMatchBlockByBase64(imagePayload, 32);\n imageWidth = paddedResult.width;\n imageHeight = paddedResult.height;\n imagePayload = paddedResult.imageBase64;\n } else if (!vlMode) {\n imagePayload = await markupImageForLLM(screenshotBase64, context.tree, {\n width: imageWidth,\n height: imageHeight,\n });\n }\n\n warnGPT4oSizeLimit(size, modelName);\n\n const historyLog = opts.conversationHistory?.snapshot() || [];\n // .filter((item) => item.role === 'assistant') || [];\n\n const knowledgeContext: ChatCompletionMessageParam[] = opts.actionContext\n ? [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<high_priority_knowledge>${opts.actionContext}</high_priority_knowledge>`,\n },\n ],\n },\n ]\n : [];\n\n const instruction: ChatCompletionMessageParam[] = [\n {\n role: 'user',\n content: [\n {\n type: 'text',\n text: `<user_instruction>${userInstruction}</user_instruction>`,\n },\n ],\n },\n ];\n\n const msgs: ChatCompletionMessageParam[] = [\n { role: 'system', content: systemPrompt },\n ...knowledgeContext,\n ...instruction,\n ...historyLog,\n {\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n detail: 'high',\n },\n },\n ...(vlMode\n ? []\n : ([\n {\n type: 'text',\n text: pageDescription,\n },\n ] as ChatCompletionContentPart[])),\n ],\n },\n ];\n\n const { content, usage } = await callAIWithObjectResponse<PlanningAIResponse>(\n msgs,\n AIActionType.PLAN,\n modelConfig,\n );\n const rawResponse = JSON.stringify(content, undefined, 2);\n const planFromAI = content;\n\n const actions =\n (planFromAI.action?.type ? [planFromAI.action] : planFromAI.actions) || [];\n const returnValue: PlanningAIResponse = {\n ...planFromAI,\n actions,\n rawResponse,\n usage,\n yamlFlow: buildYamlFlowFromPlans(\n actions,\n opts.actionSpace,\n planFromAI.sleep,\n ),\n };\n\n assert(planFromAI, \"can't get plans from AI\");\n\n // TODO: use zod.parse to parse the action.param, and then fill the bbox param.\n actions.forEach((action) => {\n const type = action.type;\n const actionInActionSpace = opts.actionSpace.find(\n (action) => action.name === type,\n );\n\n debug('actionInActionSpace matched', actionInActionSpace);\n const locateFields = actionInActionSpace\n ? findAllMidsceneLocatorField(actionInActionSpace.paramSchema)\n : [];\n\n debug('locateFields', locateFields);\n\n locateFields.forEach((field) => {\n const locateResult = action.param[field];\n if (locateResult) {\n if (vlMode) {\n action.param[field] = fillBboxParam(\n locateResult,\n imageWidth,\n imageHeight,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n } else {\n const element = elementById(locateResult);\n if (element) {\n action.param[field].id = element.id;\n }\n }\n }\n });\n });\n // in Qwen-VL, error means error. In GPT-4o, error may mean more actions are needed.\n assert(!planFromAI.error, `Failed to plan actions: ${planFromAI.error}`);\n\n if (\n actions.length === 0 &&\n returnValue.more_actions_needed_by_instruction &&\n !returnValue.sleep\n ) {\n console.warn(\n 'No actions planned for the prompt, but model said more actions are needed:',\n userInstruction,\n );\n }\n\n conversationHistory?.append({\n role: 'assistant',\n content: [\n {\n type: 'text',\n text: rawResponse,\n },\n ],\n });\n conversationHistory?.append({\n role: 'user',\n content: [\n {\n type: 'text',\n text: 'I have finished the task',\n },\n ],\n });\n\n return returnValue;\n}\n"],"names":["__webpack_require__","definition","key","Object","obj","prop","Symbol","debug","getDebug","plan","userInstruction","opts","_opts_conversationHistory","_planFromAI_action","context","modelConfig","conversationHistory","screenshotBase64","size","modelName","vlMode","pageDescription","elementById","describeUserPage","systemPrompt","systemPromptToTaskPlanning","imagePayload","imageWidth","imageHeight","rightLimit","bottomLimit","paddedResult","paddingToMatchBlockByBase64","markupImageForLLM","warnGPT4oSizeLimit","historyLog","knowledgeContext","instruction","msgs","content","usage","callAIWithObjectResponse","AIActionType","rawResponse","JSON","undefined","planFromAI","actions","returnValue","buildYamlFlowFromPlans","assert","action","type","actionInActionSpace","locateFields","findAllMidsceneLocatorField","field","locateResult","fillBboxParam","element","console"],"mappings":";;;IAAAA,oBAAoB,CAAC,GAAG,CAAC,UAASC;QACjC,IAAI,IAAIC,OAAOD,WACR,IAAGD,oBAAoB,CAAC,CAACC,YAAYC,QAAQ,CAACF,oBAAoB,CAAC,CAAC,UAASE,MACzEC,OAAO,cAAc,CAAC,UAASD,KAAK;YAAE,YAAY;YAAM,KAAKD,UAAU,CAACC,IAAI;QAAC;IAGzF;;;ICNAF,oBAAoB,CAAC,GAAG,CAACI,KAAKC,OAAUF,OAAO,SAAS,CAAC,cAAc,CAAC,IAAI,CAACC,KAAKC;;;ICClFL,oBAAoB,CAAC,GAAG,CAAC;QACxB,IAAG,AAAkB,eAAlB,OAAOM,UAA0BA,OAAO,WAAW,EACrDH,OAAO,cAAc,CAAC,UAASG,OAAO,WAAW,EAAE;YAAE,OAAO;QAAS;QAEtEH,OAAO,cAAc,CAAC,UAAS,cAAc;YAAE,OAAO;QAAK;IAC5D;;;;;;;;;;;;;;ACqBA,MAAMI,QAAQC,AAAAA,IAAAA,uBAAAA,QAAAA,AAAAA,EAAS;AAEhB,eAAeC,KACpBC,eAAuB,EACvBC,IAOC;QAwCkBC,2BAiEhBC;IAvGH,MAAM,EAAEC,OAAO,EAAEC,WAAW,EAAEC,mBAAmB,EAAE,GAAGL;IACtD,MAAM,EAAEM,gBAAgB,EAAEC,IAAI,EAAE,GAAGJ;IAEnC,MAAM,EAAEK,SAAS,EAAEC,MAAM,EAAE,GAAGL;IAE9B,MAAM,EAAE,aAAaM,eAAe,EAAEC,WAAW,EAAE,GAAG,MAAMC,AAAAA,IAAAA,wBAAAA,gBAAAA,AAAAA,EAC1DT,SACA;QAAEM;IAAO;IAEX,MAAMI,eAAe,MAAMC,AAAAA,IAAAA,gCAAAA,0BAAAA,AAAAA,EAA2B;QACpD,aAAad,KAAK,WAAW;QAC7B,QAAQS;IACV;IAEA,IAAIM,eAAeT;IACnB,IAAIU,aAAaT,KAAK,KAAK;IAC3B,IAAIU,cAAcV,KAAK,MAAM;IAC7B,MAAMW,aAAaF;IACnB,MAAMG,cAAcF;IACpB,IAAIR,AAAW,cAAXA,QAAsB;QACxB,MAAMW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN;QACvDC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC,OAAO,IAAIX,AAAW,eAAXA,QAAuB;QAChC,MAAMW,eAAe,MAAMC,AAAAA,IAAAA,oBAAAA,2BAAAA,AAAAA,EAA4BN,cAAc;QACrEC,aAAaI,aAAa,KAAK;QAC/BH,cAAcG,aAAa,MAAM;QACjCL,eAAeK,aAAa,WAAW;IACzC,OAAO,IAAI,CAACX,QACVM,eAAe,MAAMO,AAAAA,IAAAA,mCAAAA,iBAAAA,AAAAA,EAAkBhB,kBAAkBH,QAAQ,IAAI,EAAE;QACrE,OAAOa;QACP,QAAQC;IACV;IAGFM,IAAAA,mCAAAA,kBAAAA,AAAAA,EAAmBhB,MAAMC;IAEzB,MAAMgB,aAAavB,AAAAA,SAAAA,CAAAA,4BAAAA,KAAK,mBAAmB,AAAD,IAAvBA,KAAAA,IAAAA,0BAA0B,QAAQ,EAAC,KAAK,EAAE;IAG7D,MAAMwB,mBAAiDzB,KAAK,aAAa,GACrE;QACE;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,yBAAyB,EAAEA,KAAK,aAAa,CAAC,0BAA0B,CAAC;gBAClF;aACD;QACH;KACD,GACD,EAAE;IAEN,MAAM0B,cAA4C;QAChD;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,MAAM,CAAC,kBAAkB,EAAE3B,gBAAgB,mBAAmB,CAAC;gBACjE;aACD;QACH;KACD;IAED,MAAM4B,OAAqC;QACzC;YAAE,MAAM;YAAU,SAASd;QAAa;WACrCY;WACAC;WACAF;QACH;YACE,MAAM;YACN,SAAS;gBACP;oBACE,MAAM;oBACN,WAAW;wBACT,KAAKT;wBACL,QAAQ;oBACV;gBACF;mBACIN,SACA,EAAE,GACD;oBACC;wBACE,MAAM;wBACN,MAAMC;oBACR;iBACD;aACN;QACH;KACD;IAED,MAAM,EAAEkB,OAAO,EAAEC,KAAK,EAAE,GAAG,MAAMC,AAAAA,IAAAA,yBAAAA,wBAAAA,AAAAA,EAC/BH,MACAI,mCAAAA,YAAAA,CAAAA,IAAiB,EACjB3B;IAEF,MAAM4B,cAAcC,KAAK,SAAS,CAACL,SAASM,QAAW;IACvD,MAAMC,aAAaP;IAEnB,MAAMQ,UACHlC,AAAAA,CAAAA,SAAAA,CAAAA,qBAAAA,WAAW,MAAM,AAAD,IAAhBA,KAAAA,IAAAA,mBAAmB,IAAI,AAAD,IAAI;QAACiC,WAAW,MAAM;KAAC,GAAGA,WAAW,OAAM,KAAM,EAAE;IAC5E,MAAME,cAAkC;QACtC,GAAGF,UAAU;QACbC;QACAJ;QACAH;QACA,UAAUS,AAAAA,IAAAA,mCAAAA,sBAAAA,AAAAA,EACRF,SACApC,KAAK,WAAW,EAChBmC,WAAW,KAAK;IAEpB;IAEAI,IAAAA,sBAAAA,MAAAA,AAAAA,EAAOJ,YAAY;IAGnBC,QAAQ,OAAO,CAAC,CAACI;QACf,MAAMC,OAAOD,OAAO,IAAI;QACxB,MAAME,sBAAsB1C,KAAK,WAAW,CAAC,IAAI,CAC/C,CAACwC,SAAWA,OAAO,IAAI,KAAKC;QAG9B7C,MAAM,+BAA+B8C;QACrC,MAAMC,eAAeD,sBACjBE,AAAAA,IAAAA,mCAAAA,2BAAAA,AAAAA,EAA4BF,oBAAoB,WAAW,IAC3D,EAAE;QAEN9C,MAAM,gBAAgB+C;QAEtBA,aAAa,OAAO,CAAC,CAACE;YACpB,MAAMC,eAAeN,OAAO,KAAK,CAACK,MAAM;YACxC,IAAIC,cACF,IAAIrC,QACF+B,OAAO,KAAK,CAACK,MAAM,GAAGE,AAAAA,IAAAA,mCAAAA,aAAAA,AAAAA,EACpBD,cACA9B,YACAC,aACAC,YACAC,aACAV;iBAEG;gBACL,MAAMuC,UAAUrC,YAAYmC;gBAC5B,IAAIE,SACFR,OAAO,KAAK,CAACK,MAAM,CAAC,EAAE,GAAGG,QAAQ,EAAE;YAEvC;QAEJ;IACF;IAEAT,IAAAA,sBAAAA,MAAAA,AAAAA,EAAO,CAACJ,WAAW,KAAK,EAAE,CAAC,wBAAwB,EAAEA,WAAW,KAAK,EAAE;IAEvE,IACEC,AAAmB,MAAnBA,QAAQ,MAAM,IACdC,YAAY,kCAAkC,IAC9C,CAACA,YAAY,KAAK,EAElBY,QAAQ,IAAI,CACV,8EACAlD;IAIJM,QAAAA,uBAAAA,oBAAqB,MAAM,CAAC;QAC1B,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM2B;YACR;SACD;IACH;IACA3B,QAAAA,uBAAAA,oBAAqB,MAAM,CAAC;QAC1B,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,MAAM;YACR;SACD;IACH;IAEA,OAAOgC;AACT"}
|