@godscene/core 1.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +767 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/execution-session.mjs +39 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-builder.mjs +343 -0
- package/dist/es/agent/task-cache.mjs +212 -0
- package/dist/es/agent/tasks.mjs +428 -0
- package/dist/es/agent/ui-utils.mjs +101 -0
- package/dist/es/agent/utils.mjs +167 -0
- package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
- package/dist/es/ai-model/auto-glm/util.mjs +7 -0
- package/dist/es/ai-model/connectivity.mjs +136 -0
- package/dist/es/ai-model/conversation-history.mjs +193 -0
- package/dist/es/ai-model/index.mjs +12 -0
- package/dist/es/ai-model/inspect.mjs +395 -0
- package/dist/es/ai-model/llm-planning.mjs +231 -0
- package/dist/es/ai-model/prompt/common.mjs +5 -0
- package/dist/es/ai-model/prompt/describe.mjs +64 -0
- package/dist/es/ai-model/prompt/extraction.mjs +129 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
- package/dist/es/ai-model/prompt/util.mjs +57 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
- package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
- package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
- package/dist/es/ai-model/service-caller/index.mjs +648 -0
- package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
- package/dist/es/common.mjs +382 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +340 -0
- package/dist/es/dump/html-utils.mjs +290 -0
- package/dist/es/dump/index.mjs +3 -0
- package/dist/es/dump/screenshot-restoration.mjs +30 -0
- package/dist/es/dump/screenshot-store.mjs +125 -0
- package/dist/es/index.mjs +17 -0
- package/dist/es/report-cli.mjs +149 -0
- package/dist/es/report-generator.mjs +203 -0
- package/dist/es/report-markdown.mjs +216 -0
- package/dist/es/report.mjs +287 -0
- package/dist/es/screenshot-item.mjs +120 -0
- package/dist/es/service/index.mjs +272 -0
- package/dist/es/service/utils.mjs +13 -0
- package/dist/es/skill/index.mjs +35 -0
- package/dist/es/task-runner.mjs +261 -0
- package/dist/es/task-timing.mjs +10 -0
- package/dist/es/tree.mjs +11 -0
- package/dist/es/types.mjs +202 -0
- package/dist/es/utils.mjs +232 -0
- package/dist/es/yaml/builder.mjs +11 -0
- package/dist/es/yaml/index.mjs +4 -0
- package/dist/es/yaml/player.mjs +425 -0
- package/dist/es/yaml/utils.mjs +100 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +815 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/execution-session.js +73 -0
- package/dist/lib/agent/index.js +76 -0
- package/dist/lib/agent/task-builder.js +380 -0
- package/dist/lib/agent/task-cache.js +264 -0
- package/dist/lib/agent/tasks.js +471 -0
- package/dist/lib/agent/ui-utils.js +153 -0
- package/dist/lib/agent/utils.js +238 -0
- package/dist/lib/ai-model/auto-glm/actions.js +271 -0
- package/dist/lib/ai-model/auto-glm/index.js +64 -0
- package/dist/lib/ai-model/auto-glm/parser.js +280 -0
- package/dist/lib/ai-model/auto-glm/planning.js +103 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
- package/dist/lib/ai-model/auto-glm/util.js +44 -0
- package/dist/lib/ai-model/connectivity.js +180 -0
- package/dist/lib/ai-model/conversation-history.js +227 -0
- package/dist/lib/ai-model/index.js +127 -0
- package/dist/lib/ai-model/inspect.js +441 -0
- package/dist/lib/ai-model/llm-planning.js +268 -0
- package/dist/lib/ai-model/prompt/common.js +39 -0
- package/dist/lib/ai-model/prompt/describe.js +98 -0
- package/dist/lib/ai-model/prompt/extraction.js +169 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
- package/dist/lib/ai-model/prompt/util.js +103 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
- package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
- package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
- package/dist/lib/ai-model/service-caller/index.js +716 -0
- package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
- package/dist/lib/ai-model/ui-tars-planning.js +281 -0
- package/dist/lib/common.js +491 -0
- package/dist/lib/device/device-options.js +18 -0
- package/dist/lib/device/index.js +467 -0
- package/dist/lib/dump/html-utils.js +366 -0
- package/dist/lib/dump/index.js +58 -0
- package/dist/lib/dump/screenshot-restoration.js +64 -0
- package/dist/lib/dump/screenshot-store.js +165 -0
- package/dist/lib/index.js +184 -0
- package/dist/lib/report-cli.js +189 -0
- package/dist/lib/report-generator.js +244 -0
- package/dist/lib/report-markdown.js +253 -0
- package/dist/lib/report.js +333 -0
- package/dist/lib/screenshot-item.js +154 -0
- package/dist/lib/service/index.js +306 -0
- package/dist/lib/service/utils.js +47 -0
- package/dist/lib/skill/index.js +69 -0
- package/dist/lib/task-runner.js +298 -0
- package/dist/lib/task-timing.js +44 -0
- package/dist/lib/tree.js +51 -0
- package/dist/lib/types.js +298 -0
- package/dist/lib/utils.js +314 -0
- package/dist/lib/yaml/builder.js +55 -0
- package/dist/lib/yaml/index.js +79 -0
- package/dist/lib/yaml/player.js +459 -0
- package/dist/lib/yaml/utils.js +153 -0
- package/dist/lib/yaml.js +18 -0
- package/dist/types/agent/agent.d.ts +220 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/task-cache.d.ts +49 -0
- package/dist/types/agent/tasks.d.ts +70 -0
- package/dist/types/agent/ui-utils.d.ts +14 -0
- package/dist/types/agent/utils.d.ts +25 -0
- package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
- package/dist/types/ai-model/connectivity.d.ts +20 -0
- package/dist/types/ai-model/conversation-history.d.ts +105 -0
- package/dist/types/ai-model/index.d.ts +16 -0
- package/dist/types/ai-model/inspect.d.ts +67 -0
- package/dist/types/ai-model/llm-planning.d.ts +19 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +33 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
- package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
- package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
- package/dist/types/ai-model/service-caller/index.d.ts +60 -0
- package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
- package/dist/types/common.d.ts +288 -0
- package/dist/types/device/device-options.d.ts +155 -0
- package/dist/types/device/index.d.ts +2565 -0
- package/dist/types/dump/html-utils.d.ts +75 -0
- package/dist/types/dump/index.d.ts +5 -0
- package/dist/types/dump/screenshot-restoration.d.ts +8 -0
- package/dist/types/dump/screenshot-store.d.ts +49 -0
- package/dist/types/index.d.ts +21 -0
- package/dist/types/report-cli.d.ts +36 -0
- package/dist/types/report-generator.d.ts +88 -0
- package/dist/types/report-markdown.d.ts +24 -0
- package/dist/types/report.d.ts +52 -0
- package/dist/types/screenshot-item.d.ts +67 -0
- package/dist/types/service/index.d.ts +24 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/skill/index.d.ts +25 -0
- package/dist/types/task-runner.d.ts +50 -0
- package/dist/types/task-timing.d.ts +8 -0
- package/dist/types/tree.d.ts +4 -0
- package/dist/types/types.d.ts +684 -0
- package/dist/types/utils.d.ts +45 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +4 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +215 -0
- package/package.json +130 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __webpack_require__ = {};
|
|
3
|
+
(()=>{
|
|
4
|
+
__webpack_require__.d = (exports1, definition)=>{
|
|
5
|
+
for(var key in definition)if (__webpack_require__.o(definition, key) && !__webpack_require__.o(exports1, key)) Object.defineProperty(exports1, key, {
|
|
6
|
+
enumerable: true,
|
|
7
|
+
get: definition[key]
|
|
8
|
+
});
|
|
9
|
+
};
|
|
10
|
+
})();
|
|
11
|
+
(()=>{
|
|
12
|
+
__webpack_require__.o = (obj, prop)=>Object.prototype.hasOwnProperty.call(obj, prop);
|
|
13
|
+
})();
|
|
14
|
+
(()=>{
|
|
15
|
+
__webpack_require__.r = (exports1)=>{
|
|
16
|
+
if ("u" > typeof Symbol && Symbol.toStringTag) Object.defineProperty(exports1, Symbol.toStringTag, {
|
|
17
|
+
value: 'Module'
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(exports1, '__esModule', {
|
|
20
|
+
value: true
|
|
21
|
+
});
|
|
22
|
+
};
|
|
23
|
+
})();
|
|
24
|
+
var __webpack_exports__ = {};
|
|
25
|
+
__webpack_require__.r(__webpack_exports__);
|
|
26
|
+
__webpack_require__.d(__webpack_exports__, {
|
|
27
|
+
AiLocateElement: ()=>AiLocateElement,
|
|
28
|
+
AiJudgeOrderSensitive: ()=>AiJudgeOrderSensitive,
|
|
29
|
+
AiExtractElementInfo: ()=>AiExtractElementInfo,
|
|
30
|
+
AiLocateSection: ()=>AiLocateSection,
|
|
31
|
+
buildSearchAreaConfig: ()=>buildSearchAreaConfig
|
|
32
|
+
});
|
|
33
|
+
const dom_util_namespaceObject = require("@godscene/shared/extractor/dom-util");
|
|
34
|
+
const img_namespaceObject = require("@godscene/shared/img");
|
|
35
|
+
const logger_namespaceObject = require("@godscene/shared/logger");
|
|
36
|
+
const utils_namespaceObject = require("@godscene/shared/utils");
|
|
37
|
+
const external_common_js_namespaceObject = require("../common.js");
|
|
38
|
+
const parser_js_namespaceObject = require("./auto-glm/parser.js");
|
|
39
|
+
const prompt_js_namespaceObject = require("./auto-glm/prompt.js");
|
|
40
|
+
const util_js_namespaceObject = require("./auto-glm/util.js");
|
|
41
|
+
const extraction_js_namespaceObject = require("./prompt/extraction.js");
|
|
42
|
+
const llm_locator_js_namespaceObject = require("./prompt/llm-locator.js");
|
|
43
|
+
const llm_section_locator_js_namespaceObject = require("./prompt/llm-section-locator.js");
|
|
44
|
+
const order_sensitive_judge_js_namespaceObject = require("./prompt/order-sensitive-judge.js");
|
|
45
|
+
const index_js_namespaceObject = require("./service-caller/index.js");
|
|
46
|
+
const debugInspect = (0, logger_namespaceObject.getDebug)('ai:inspect');
|
|
47
|
+
const debugSection = (0, logger_namespaceObject.getDebug)('ai:section');
|
|
48
|
+
async function buildSearchAreaConfig(options) {
|
|
49
|
+
const { context, baseRect, modelFamily } = options;
|
|
50
|
+
const scaleRatio = 2;
|
|
51
|
+
const sectionRect = (0, external_common_js_namespaceObject.expandSearchArea)(baseRect, context.shotSize);
|
|
52
|
+
const croppedResult = await (0, img_namespaceObject.cropByRect)(context.screenshot.base64, sectionRect, 'qwen2.5-vl' === modelFamily);
|
|
53
|
+
const scaledResult = await (0, img_namespaceObject.scaleImage)(croppedResult.imageBase64, scaleRatio);
|
|
54
|
+
sectionRect.width = scaledResult.width;
|
|
55
|
+
sectionRect.height = scaledResult.height;
|
|
56
|
+
return {
|
|
57
|
+
rect: sectionRect,
|
|
58
|
+
imageBase64: scaledResult.imageBase64,
|
|
59
|
+
scale: scaleRatio
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
const extraTextFromUserPrompt = (prompt)=>{
|
|
63
|
+
if ('string' == typeof prompt) return prompt;
|
|
64
|
+
return prompt.prompt;
|
|
65
|
+
};
|
|
66
|
+
const promptsToChatParam = async (multimodalPrompt)=>{
|
|
67
|
+
const msgs = [];
|
|
68
|
+
if (multimodalPrompt?.images?.length) {
|
|
69
|
+
msgs.push({
|
|
70
|
+
role: 'user',
|
|
71
|
+
content: [
|
|
72
|
+
{
|
|
73
|
+
type: 'text',
|
|
74
|
+
text: 'Next, I will provide all the reference images.'
|
|
75
|
+
}
|
|
76
|
+
]
|
|
77
|
+
});
|
|
78
|
+
for (const item of multimodalPrompt.images){
|
|
79
|
+
const base64 = await (0, img_namespaceObject.preProcessImageUrl)(item.url, !!multimodalPrompt.convertHttpImage2Base64);
|
|
80
|
+
msgs.push({
|
|
81
|
+
role: 'user',
|
|
82
|
+
content: [
|
|
83
|
+
{
|
|
84
|
+
type: 'text',
|
|
85
|
+
text: `this is the reference image named '${item.name}':`
|
|
86
|
+
}
|
|
87
|
+
]
|
|
88
|
+
});
|
|
89
|
+
msgs.push({
|
|
90
|
+
role: 'user',
|
|
91
|
+
content: [
|
|
92
|
+
{
|
|
93
|
+
type: 'image_url',
|
|
94
|
+
image_url: {
|
|
95
|
+
url: base64,
|
|
96
|
+
detail: 'high'
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return msgs;
|
|
104
|
+
};
|
|
105
|
+
async function AiLocateElement(options) {
|
|
106
|
+
const { context, targetElementDescription, modelConfig } = options;
|
|
107
|
+
const { modelFamily } = modelConfig;
|
|
108
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
109
|
+
(0, utils_namespaceObject.assert)(targetElementDescription, "cannot find the target element description");
|
|
110
|
+
const targetElementDescriptionText = extraTextFromUserPrompt(targetElementDescription);
|
|
111
|
+
const userInstructionPrompt = (0, llm_locator_js_namespaceObject.findElementPrompt)(targetElementDescriptionText);
|
|
112
|
+
const systemPrompt = (0, util_js_namespaceObject.isAutoGLM)(modelFamily) ? (0, prompt_js_namespaceObject.getAutoGLMLocatePrompt)(modelFamily) : (0, llm_locator_js_namespaceObject.systemPromptToLocateElement)(modelFamily);
|
|
113
|
+
let imagePayload = screenshotBase64;
|
|
114
|
+
let imageWidth = context.shotSize.width;
|
|
115
|
+
let imageHeight = context.shotSize.height;
|
|
116
|
+
let originalImageWidth = imageWidth;
|
|
117
|
+
let originalImageHeight = imageHeight;
|
|
118
|
+
if (options.searchConfig) {
|
|
119
|
+
(0, utils_namespaceObject.assert)(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
120
|
+
(0, utils_namespaceObject.assert)(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
121
|
+
imagePayload = options.searchConfig.imageBase64;
|
|
122
|
+
imageWidth = options.searchConfig.rect?.width;
|
|
123
|
+
imageHeight = options.searchConfig.rect?.height;
|
|
124
|
+
originalImageWidth = imageWidth;
|
|
125
|
+
originalImageHeight = imageHeight;
|
|
126
|
+
} else if ('qwen2.5-vl' === modelFamily) {
|
|
127
|
+
const paddedResult = await (0, img_namespaceObject.paddingToMatchBlockByBase64)(imagePayload);
|
|
128
|
+
imageWidth = paddedResult.width;
|
|
129
|
+
imageHeight = paddedResult.height;
|
|
130
|
+
imagePayload = paddedResult.imageBase64;
|
|
131
|
+
}
|
|
132
|
+
const msgs = [
|
|
133
|
+
{
|
|
134
|
+
role: 'system',
|
|
135
|
+
content: systemPrompt
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
role: 'user',
|
|
139
|
+
content: [
|
|
140
|
+
{
|
|
141
|
+
type: 'image_url',
|
|
142
|
+
image_url: {
|
|
143
|
+
url: imagePayload,
|
|
144
|
+
detail: 'high'
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
type: 'text',
|
|
149
|
+
text: (0, util_js_namespaceObject.isAutoGLM)(modelFamily) ? `Tap: ${userInstructionPrompt}` : userInstructionPrompt
|
|
150
|
+
}
|
|
151
|
+
]
|
|
152
|
+
}
|
|
153
|
+
];
|
|
154
|
+
if ('string' != typeof targetElementDescription) {
|
|
155
|
+
const addOns = await promptsToChatParam({
|
|
156
|
+
images: targetElementDescription.images,
|
|
157
|
+
convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64
|
|
158
|
+
});
|
|
159
|
+
msgs.push(...addOns);
|
|
160
|
+
}
|
|
161
|
+
if ((0, util_js_namespaceObject.isAutoGLM)(modelFamily)) {
|
|
162
|
+
const { content: rawResponseContent, usage } = await (0, index_js_namespaceObject.callAIWithStringResponse)(msgs, modelConfig, {
|
|
163
|
+
abortSignal: options.abortSignal
|
|
164
|
+
});
|
|
165
|
+
debugInspect('auto-glm rawResponse:', rawResponseContent);
|
|
166
|
+
const parsed = (0, parser_js_namespaceObject.parseAutoGLMLocateResponse)(rawResponseContent);
|
|
167
|
+
debugInspect('auto-glm thinking:', parsed.think);
|
|
168
|
+
debugInspect('auto-glm coordinates:', parsed.coordinates);
|
|
169
|
+
let resRect;
|
|
170
|
+
let matchedElements = [];
|
|
171
|
+
let errors = [];
|
|
172
|
+
if (parsed.error || !parsed.coordinates) {
|
|
173
|
+
errors = [
|
|
174
|
+
parsed.error || 'Failed to parse auto-glm response'
|
|
175
|
+
];
|
|
176
|
+
debugInspect('auto-glm parse error:', errors[0]);
|
|
177
|
+
} else {
|
|
178
|
+
const { x, y } = parsed.coordinates;
|
|
179
|
+
debugInspect('auto-glm coordinates [0-999]:', {
|
|
180
|
+
x,
|
|
181
|
+
y
|
|
182
|
+
});
|
|
183
|
+
const pixelX = Math.round(x * imageWidth / 1000);
|
|
184
|
+
const pixelY = Math.round(y * imageHeight / 1000);
|
|
185
|
+
debugInspect('auto-glm pixel coordinates:', {
|
|
186
|
+
pixelX,
|
|
187
|
+
pixelY
|
|
188
|
+
});
|
|
189
|
+
let finalX = pixelX;
|
|
190
|
+
let finalY = pixelY;
|
|
191
|
+
if (options.searchConfig?.rect) {
|
|
192
|
+
finalX += options.searchConfig.rect.left;
|
|
193
|
+
finalY += options.searchConfig.rect.top;
|
|
194
|
+
}
|
|
195
|
+
const element = (0, dom_util_namespaceObject.generateElementByPoint)([
|
|
196
|
+
finalX,
|
|
197
|
+
finalY
|
|
198
|
+
], targetElementDescriptionText);
|
|
199
|
+
resRect = element.rect;
|
|
200
|
+
debugInspect('auto-glm resRect:', resRect);
|
|
201
|
+
if (element) matchedElements = [
|
|
202
|
+
element
|
|
203
|
+
];
|
|
204
|
+
}
|
|
205
|
+
return {
|
|
206
|
+
rect: resRect,
|
|
207
|
+
parseResult: {
|
|
208
|
+
elements: matchedElements,
|
|
209
|
+
errors
|
|
210
|
+
},
|
|
211
|
+
rawResponse: rawResponseContent,
|
|
212
|
+
usage,
|
|
213
|
+
reasoning_content: parsed.think
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
let res;
|
|
217
|
+
try {
|
|
218
|
+
res = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs, modelConfig, {
|
|
219
|
+
abortSignal: options.abortSignal
|
|
220
|
+
});
|
|
221
|
+
} catch (callError) {
|
|
222
|
+
const errorMessage = callError instanceof Error ? callError.message : String(callError);
|
|
223
|
+
const rawResponse = callError instanceof index_js_namespaceObject.AIResponseParseError ? callError.rawResponse : errorMessage;
|
|
224
|
+
const usage = callError instanceof index_js_namespaceObject.AIResponseParseError ? callError.usage : void 0;
|
|
225
|
+
return {
|
|
226
|
+
rect: void 0,
|
|
227
|
+
parseResult: {
|
|
228
|
+
elements: [],
|
|
229
|
+
errors: [
|
|
230
|
+
`AI call error: ${errorMessage}`
|
|
231
|
+
]
|
|
232
|
+
},
|
|
233
|
+
rawResponse,
|
|
234
|
+
usage,
|
|
235
|
+
reasoning_content: void 0
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
const rawResponse = JSON.stringify(res.content);
|
|
239
|
+
let resRect;
|
|
240
|
+
let matchedElements = [];
|
|
241
|
+
let errors = 'errors' in res.content ? res.content.errors : [];
|
|
242
|
+
try {
|
|
243
|
+
if ('bbox' in res.content && Array.isArray(res.content.bbox) && res.content.bbox.length >= 1) {
|
|
244
|
+
resRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(res.content.bbox, imageWidth, imageHeight, options.searchConfig?.rect?.left, options.searchConfig?.rect?.top, originalImageWidth, originalImageHeight, modelFamily, options.searchConfig?.scale);
|
|
245
|
+
debugInspect('resRect', resRect);
|
|
246
|
+
const element = (0, dom_util_namespaceObject.generateElementByRect)(resRect, targetElementDescriptionText);
|
|
247
|
+
errors = [];
|
|
248
|
+
if (element) matchedElements = [
|
|
249
|
+
element
|
|
250
|
+
];
|
|
251
|
+
}
|
|
252
|
+
} catch (e) {
|
|
253
|
+
const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : 'unknown error in locate';
|
|
254
|
+
if (errors && errors?.length !== 0) errors.push(`(${msg})`);
|
|
255
|
+
else errors = [
|
|
256
|
+
msg
|
|
257
|
+
];
|
|
258
|
+
}
|
|
259
|
+
return {
|
|
260
|
+
rect: resRect,
|
|
261
|
+
parseResult: {
|
|
262
|
+
elements: matchedElements,
|
|
263
|
+
errors: errors
|
|
264
|
+
},
|
|
265
|
+
rawResponse,
|
|
266
|
+
usage: res.usage,
|
|
267
|
+
reasoning_content: res.reasoning_content
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
async function AiLocateSection(options) {
|
|
271
|
+
const { context, sectionDescription, modelConfig } = options;
|
|
272
|
+
const { modelFamily } = modelConfig;
|
|
273
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
274
|
+
const systemPrompt = (0, llm_section_locator_js_namespaceObject.systemPromptToLocateSection)(modelFamily);
|
|
275
|
+
const sectionLocatorInstructionText = (0, llm_section_locator_js_namespaceObject.sectionLocatorInstruction)(extraTextFromUserPrompt(sectionDescription));
|
|
276
|
+
const msgs = [
|
|
277
|
+
{
|
|
278
|
+
role: 'system',
|
|
279
|
+
content: systemPrompt
|
|
280
|
+
},
|
|
281
|
+
{
|
|
282
|
+
role: 'user',
|
|
283
|
+
content: [
|
|
284
|
+
{
|
|
285
|
+
type: 'image_url',
|
|
286
|
+
image_url: {
|
|
287
|
+
url: screenshotBase64,
|
|
288
|
+
detail: 'high'
|
|
289
|
+
}
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
type: 'text',
|
|
293
|
+
text: sectionLocatorInstructionText
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
}
|
|
297
|
+
];
|
|
298
|
+
if ('string' != typeof sectionDescription) {
|
|
299
|
+
const addOns = await promptsToChatParam({
|
|
300
|
+
images: sectionDescription.images,
|
|
301
|
+
convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64
|
|
302
|
+
});
|
|
303
|
+
msgs.push(...addOns);
|
|
304
|
+
}
|
|
305
|
+
let result;
|
|
306
|
+
try {
|
|
307
|
+
result = await (0, index_js_namespaceObject.callAIWithObjectResponse)(msgs, modelConfig, {
|
|
308
|
+
abortSignal: options.abortSignal
|
|
309
|
+
});
|
|
310
|
+
} catch (callError) {
|
|
311
|
+
const errorMessage = callError instanceof Error ? callError.message : String(callError);
|
|
312
|
+
const rawResponse = callError instanceof index_js_namespaceObject.AIResponseParseError ? callError.rawResponse : errorMessage;
|
|
313
|
+
const usage = callError instanceof index_js_namespaceObject.AIResponseParseError ? callError.usage : void 0;
|
|
314
|
+
return {
|
|
315
|
+
rect: void 0,
|
|
316
|
+
imageBase64: void 0,
|
|
317
|
+
error: `AI call error: ${errorMessage}`,
|
|
318
|
+
rawResponse,
|
|
319
|
+
usage
|
|
320
|
+
};
|
|
321
|
+
}
|
|
322
|
+
let searchAreaConfig;
|
|
323
|
+
const sectionBbox = result.content.bbox;
|
|
324
|
+
if (sectionBbox) {
|
|
325
|
+
const targetRect = (0, external_common_js_namespaceObject.adaptBboxToRect)(sectionBbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily);
|
|
326
|
+
debugSection('original targetRect %j', targetRect);
|
|
327
|
+
const referenceBboxList = result.content.references_bbox || [];
|
|
328
|
+
debugSection('referenceBboxList %j', referenceBboxList);
|
|
329
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>(0, external_common_js_namespaceObject.adaptBboxToRect)(bbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily));
|
|
330
|
+
debugSection('referenceRects %j', referenceRects);
|
|
331
|
+
const mergedRect = (0, external_common_js_namespaceObject.mergeRects)([
|
|
332
|
+
targetRect,
|
|
333
|
+
...referenceRects
|
|
334
|
+
]);
|
|
335
|
+
debugSection('mergedRect %j', mergedRect);
|
|
336
|
+
const expandedRect = (0, external_common_js_namespaceObject.expandSearchArea)(mergedRect, context.shotSize);
|
|
337
|
+
const originalWidth = expandedRect.width;
|
|
338
|
+
const originalHeight = expandedRect.height;
|
|
339
|
+
debugSection('expanded sectionRect %j', expandedRect);
|
|
340
|
+
searchAreaConfig = await buildSearchAreaConfig({
|
|
341
|
+
context,
|
|
342
|
+
baseRect: mergedRect,
|
|
343
|
+
modelFamily
|
|
344
|
+
});
|
|
345
|
+
debugSection('scaled sectionRect from %dx%d to %dx%d (scale=%d)', originalWidth, originalHeight, searchAreaConfig.rect.width, searchAreaConfig.rect.height, searchAreaConfig.scale);
|
|
346
|
+
}
|
|
347
|
+
return {
|
|
348
|
+
rect: searchAreaConfig?.rect,
|
|
349
|
+
imageBase64: searchAreaConfig?.imageBase64,
|
|
350
|
+
scale: searchAreaConfig?.scale,
|
|
351
|
+
error: result.content.error,
|
|
352
|
+
rawResponse: JSON.stringify(result.content),
|
|
353
|
+
usage: result.usage
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
async function AiExtractElementInfo(options) {
|
|
357
|
+
const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } = options;
|
|
358
|
+
const systemPrompt = (0, extraction_js_namespaceObject.systemPromptToExtract)();
|
|
359
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
360
|
+
const extractDataPromptText = (0, extraction_js_namespaceObject.extractDataQueryPrompt)(options.pageDescription || '', dataQuery);
|
|
361
|
+
const userContent = [];
|
|
362
|
+
if (extractOption?.screenshotIncluded !== false) userContent.push({
|
|
363
|
+
type: 'image_url',
|
|
364
|
+
image_url: {
|
|
365
|
+
url: screenshotBase64,
|
|
366
|
+
detail: 'high'
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
userContent.push({
|
|
370
|
+
type: 'text',
|
|
371
|
+
text: extractDataPromptText
|
|
372
|
+
});
|
|
373
|
+
const msgs = [
|
|
374
|
+
{
|
|
375
|
+
role: 'system',
|
|
376
|
+
content: systemPrompt
|
|
377
|
+
},
|
|
378
|
+
{
|
|
379
|
+
role: 'user',
|
|
380
|
+
content: userContent
|
|
381
|
+
}
|
|
382
|
+
];
|
|
383
|
+
if (multimodalPrompt) {
|
|
384
|
+
const addOns = await promptsToChatParam({
|
|
385
|
+
images: multimodalPrompt.images,
|
|
386
|
+
convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64
|
|
387
|
+
});
|
|
388
|
+
msgs.push(...addOns);
|
|
389
|
+
}
|
|
390
|
+
const { content: rawResponse, usage, reasoning_content } = await (0, index_js_namespaceObject.callAI)(msgs, modelConfig);
|
|
391
|
+
let parseResult;
|
|
392
|
+
try {
|
|
393
|
+
parseResult = (0, extraction_js_namespaceObject.parseXMLExtractionResponse)(rawResponse);
|
|
394
|
+
} catch (parseError) {
|
|
395
|
+
const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
|
|
396
|
+
throw new index_js_namespaceObject.AIResponseParseError(`XML parse error: ${errorMessage}`, rawResponse, usage);
|
|
397
|
+
}
|
|
398
|
+
return {
|
|
399
|
+
parseResult,
|
|
400
|
+
rawResponse,
|
|
401
|
+
usage,
|
|
402
|
+
reasoning_content
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
|
|
406
|
+
const systemPrompt = (0, order_sensitive_judge_js_namespaceObject.systemPromptToJudgeOrderSensitive)();
|
|
407
|
+
const userPrompt = (0, order_sensitive_judge_js_namespaceObject.orderSensitiveJudgePrompt)(description);
|
|
408
|
+
const msgs = [
|
|
409
|
+
{
|
|
410
|
+
role: 'system',
|
|
411
|
+
content: systemPrompt
|
|
412
|
+
},
|
|
413
|
+
{
|
|
414
|
+
role: 'user',
|
|
415
|
+
content: userPrompt
|
|
416
|
+
}
|
|
417
|
+
];
|
|
418
|
+
debugInspect("AiJudgeOrderSensitive: deepThink=false, description=%s", description);
|
|
419
|
+
const result = await callAIFn(msgs, modelConfig, {
|
|
420
|
+
deepThink: false
|
|
421
|
+
});
|
|
422
|
+
return {
|
|
423
|
+
isOrderSensitive: result.content.isOrderSensitive ?? false,
|
|
424
|
+
usage: result.usage
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
exports.AiExtractElementInfo = __webpack_exports__.AiExtractElementInfo;
|
|
428
|
+
exports.AiJudgeOrderSensitive = __webpack_exports__.AiJudgeOrderSensitive;
|
|
429
|
+
exports.AiLocateElement = __webpack_exports__.AiLocateElement;
|
|
430
|
+
exports.AiLocateSection = __webpack_exports__.AiLocateSection;
|
|
431
|
+
exports.buildSearchAreaConfig = __webpack_exports__.buildSearchAreaConfig;
|
|
432
|
+
for(var __rspack_i in __webpack_exports__)if (-1 === [
|
|
433
|
+
"AiExtractElementInfo",
|
|
434
|
+
"AiJudgeOrderSensitive",
|
|
435
|
+
"AiLocateElement",
|
|
436
|
+
"AiLocateSection",
|
|
437
|
+
"buildSearchAreaConfig"
|
|
438
|
+
].indexOf(__rspack_i)) exports[__rspack_i] = __webpack_exports__[__rspack_i];
|
|
439
|
+
Object.defineProperty(exports, '__esModule', {
|
|
440
|
+
value: true
|
|
441
|
+
});
|