@rpascene/core 0.30.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +636 -0
- package/dist/es/agent/agent.mjs.map +1 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-cache.mjs +184 -0
- package/dist/es/agent/task-cache.mjs.map +1 -0
- package/dist/es/agent/tasks.mjs +666 -0
- package/dist/es/agent/tasks.mjs.map +1 -0
- package/dist/es/agent/ui-utils.mjs +72 -0
- package/dist/es/agent/ui-utils.mjs.map +1 -0
- package/dist/es/agent/utils.mjs +162 -0
- package/dist/es/agent/utils.mjs.map +1 -0
- package/dist/es/ai-model/action-executor.mjs +129 -0
- package/dist/es/ai-model/action-executor.mjs.map +1 -0
- package/dist/es/ai-model/common.mjs +355 -0
- package/dist/es/ai-model/common.mjs.map +1 -0
- package/dist/es/ai-model/conversation-history.mjs +58 -0
- package/dist/es/ai-model/conversation-history.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +11 -0
- package/dist/es/ai-model/inspect.mjs +286 -0
- package/dist/es/ai-model/inspect.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +140 -0
- package/dist/es/ai-model/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/assertion.mjs +31 -0
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
- package/dist/es/ai-model/prompt/common.mjs +7 -0
- package/dist/es/ai-model/prompt/common.mjs.map +1 -0
- package/dist/es/ai-model/prompt/describe.mjs +44 -0
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +140 -0
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +367 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/util.mjs +124 -0
- package/dist/es/ai-model/prompt/util.mjs.map +1 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +537 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +201 -0
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
- package/dist/es/device/index.mjs +152 -0
- package/dist/es/device/index.mjs.map +1 -0
- package/dist/es/image/index.mjs +2 -0
- package/dist/es/index.mjs +11 -0
- package/dist/es/index.mjs.map +1 -0
- package/dist/es/insight/index.mjs +233 -0
- package/dist/es/insight/index.mjs.map +1 -0
- package/dist/es/insight/utils.mjs +15 -0
- package/dist/es/insight/utils.mjs.map +1 -0
- package/dist/es/report.mjs +88 -0
- package/dist/es/report.mjs.map +1 -0
- package/dist/es/tree.mjs +2 -0
- package/dist/es/types.mjs +11 -0
- package/dist/es/types.mjs.map +1 -0
- package/dist/es/utils.mjs +204 -0
- package/dist/es/utils.mjs.map +1 -0
- package/dist/es/yaml/builder.mjs +13 -0
- package/dist/es/yaml/builder.mjs.map +1 -0
- package/dist/es/yaml/index.mjs +3 -0
- package/dist/es/yaml/player.mjs +372 -0
- package/dist/es/yaml/player.mjs.map +1 -0
- package/dist/es/yaml/utils.mjs +73 -0
- package/dist/es/yaml/utils.mjs.map +1 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +683 -0
- package/dist/lib/agent/agent.js.map +1 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/index.js +81 -0
- package/dist/lib/agent/index.js.map +1 -0
- package/dist/lib/agent/task-cache.js +236 -0
- package/dist/lib/agent/task-cache.js.map +1 -0
- package/dist/lib/agent/tasks.js +703 -0
- package/dist/lib/agent/tasks.js.map +1 -0
- package/dist/lib/agent/ui-utils.js +121 -0
- package/dist/lib/agent/ui-utils.js.map +1 -0
- package/dist/lib/agent/utils.js +233 -0
- package/dist/lib/agent/utils.js.map +1 -0
- package/dist/lib/ai-model/action-executor.js +163 -0
- package/dist/lib/ai-model/action-executor.js.map +1 -0
- package/dist/lib/ai-model/common.js +461 -0
- package/dist/lib/ai-model/common.js.map +1 -0
- package/dist/lib/ai-model/conversation-history.js +92 -0
- package/dist/lib/ai-model/conversation-history.js.map +1 -0
- package/dist/lib/ai-model/index.js +131 -0
- package/dist/lib/ai-model/index.js.map +1 -0
- package/dist/lib/ai-model/inspect.js +326 -0
- package/dist/lib/ai-model/inspect.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +174 -0
- package/dist/lib/ai-model/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/assertion.js +65 -0
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
- package/dist/lib/ai-model/prompt/common.js +41 -0
- package/dist/lib/ai-model/prompt/common.js.map +1 -0
- package/dist/lib/ai-model/prompt/describe.js +78 -0
- package/dist/lib/ai-model/prompt/describe.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +180 -0
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +407 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/util.js +176 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +623 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -0
- package/dist/lib/ai-model/ui-tars-planning.js +238 -0
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
- package/dist/lib/device/index.js +255 -0
- package/dist/lib/device/index.js.map +1 -0
- package/dist/lib/image/index.js +56 -0
- package/dist/lib/image/index.js.map +1 -0
- package/dist/lib/index.js +103 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/insight/index.js +267 -0
- package/dist/lib/insight/index.js.map +1 -0
- package/dist/lib/insight/utils.js +49 -0
- package/dist/lib/insight/utils.js.map +1 -0
- package/dist/lib/report.js +122 -0
- package/dist/lib/report.js.map +1 -0
- package/dist/lib/tree.js +44 -0
- package/dist/lib/tree.js.map +1 -0
- package/dist/lib/types.js +82 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.js +283 -0
- package/dist/lib/utils.js.map +1 -0
- package/dist/lib/yaml/builder.js +57 -0
- package/dist/lib/yaml/builder.js.map +1 -0
- package/dist/lib/yaml/index.js +80 -0
- package/dist/lib/yaml/index.js.map +1 -0
- package/dist/lib/yaml/player.js +406 -0
- package/dist/lib/yaml/player.js.map +1 -0
- package/dist/lib/yaml/utils.js +126 -0
- package/dist/lib/yaml/utils.js.map +1 -0
- package/dist/lib/yaml.js +20 -0
- package/dist/lib/yaml.js.map +1 -0
- package/dist/types/agent/agent.d.ts +156 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-cache.d.ts +48 -0
- package/dist/types/agent/tasks.d.ts +48 -0
- package/dist/types/agent/ui-utils.d.ts +7 -0
- package/dist/types/agent/utils.d.ts +52 -0
- package/dist/types/ai-model/action-executor.d.ts +19 -0
- package/dist/types/ai-model/common.d.ts +569 -0
- package/dist/types/ai-model/conversation-history.d.ts +18 -0
- package/dist/types/ai-model/index.d.ts +13 -0
- package/dist/types/ai-model/inspect.d.ts +46 -0
- package/dist/types/ai-model/llm-planning.d.ts +11 -0
- package/dist/types/ai-model/prompt/assertion.d.ts +2 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +47 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
- package/dist/types/ai-model/service-caller/index.d.ts +48 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +59 -0
- package/dist/types/device/index.d.ts +2158 -0
- package/dist/types/image/index.d.ts +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/insight/index.d.ts +31 -0
- package/dist/types/insight/utils.d.ts +2 -0
- package/dist/types/report.d.ts +12 -0
- package/dist/types/tree.d.ts +1 -0
- package/dist/types/types.d.ts +414 -0
- package/dist/types/utils.d.ts +40 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +3 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +178 -0
- package/package.json +108 -0
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
import { assert } from "@rpascene/shared/utils";
|
|
2
|
+
import { NodeType } from "@rpascene/shared/constants";
|
|
3
|
+
import { treeToList } from "@rpascene/shared/extractor";
|
|
4
|
+
import { compositeElementInfoImg } from "@rpascene/shared/img";
|
|
5
|
+
import { getDebug } from "@rpascene/shared/logger";
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
var common_AIActionType = /*#__PURE__*/ function(AIActionType) {
|
|
8
|
+
AIActionType[AIActionType["ASSERT"] = 0] = "ASSERT";
|
|
9
|
+
AIActionType[AIActionType["INSPECT_ELEMENT"] = 1] = "INSPECT_ELEMENT";
|
|
10
|
+
AIActionType[AIActionType["EXTRACT_DATA"] = 2] = "EXTRACT_DATA";
|
|
11
|
+
AIActionType[AIActionType["PLAN"] = 3] = "PLAN";
|
|
12
|
+
AIActionType[AIActionType["DESCRIBE_ELEMENT"] = 4] = "DESCRIBE_ELEMENT";
|
|
13
|
+
AIActionType[AIActionType["TEXT"] = 5] = "TEXT";
|
|
14
|
+
return AIActionType;
|
|
15
|
+
}({});
|
|
16
|
+
const defaultBboxSize = 20;
|
|
17
|
+
const debugInspectUtils = getDebug('ai:common');
|
|
18
|
+
function fillBboxParam(locate, width, height, rightLimit, bottomLimit, vlMode) {
|
|
19
|
+
if (locate.bbox_2d && !(null == locate ? void 0 : locate.bbox)) {
|
|
20
|
+
locate.bbox = locate.bbox_2d;
|
|
21
|
+
delete locate.bbox_2d;
|
|
22
|
+
}
|
|
23
|
+
if (null == locate ? void 0 : locate.bbox) locate.bbox = adaptBbox(locate.bbox, width, height, rightLimit, bottomLimit, vlMode);
|
|
24
|
+
return locate;
|
|
25
|
+
}
|
|
26
|
+
function adaptQwenBbox(bbox) {
|
|
27
|
+
if (bbox.length < 2) {
|
|
28
|
+
const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;
|
|
29
|
+
throw new Error(msg);
|
|
30
|
+
}
|
|
31
|
+
const result = [
|
|
32
|
+
Math.round(bbox[0]),
|
|
33
|
+
Math.round(bbox[1]),
|
|
34
|
+
'number' == typeof bbox[2] ? Math.round(bbox[2]) : Math.round(bbox[0] + defaultBboxSize),
|
|
35
|
+
'number' == typeof bbox[3] ? Math.round(bbox[3]) : Math.round(bbox[1] + defaultBboxSize)
|
|
36
|
+
];
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
39
|
+
function adaptDoubaoBbox(bbox, width, height) {
|
|
40
|
+
assert(width > 0 && height > 0, 'width and height must be greater than 0 in doubao mode');
|
|
41
|
+
if ('string' == typeof bbox) {
|
|
42
|
+
assert(/^(\d+)\s(\d+)\s(\d+)\s(\d+)$/.test(bbox.trim()), `invalid bbox data string for doubao-vision mode: ${bbox}`);
|
|
43
|
+
const splitted = bbox.split(' ');
|
|
44
|
+
if (4 === splitted.length) return [
|
|
45
|
+
Math.round(Number(splitted[0]) * width / 1000),
|
|
46
|
+
Math.round(Number(splitted[1]) * height / 1000),
|
|
47
|
+
Math.round(Number(splitted[2]) * width / 1000),
|
|
48
|
+
Math.round(Number(splitted[3]) * height / 1000)
|
|
49
|
+
];
|
|
50
|
+
throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);
|
|
51
|
+
}
|
|
52
|
+
if (Array.isArray(bbox) && Array.isArray(bbox[0])) bbox = bbox[0];
|
|
53
|
+
let bboxList = [];
|
|
54
|
+
if (Array.isArray(bbox) && 'string' == typeof bbox[0]) bbox.forEach((item)=>{
|
|
55
|
+
if ('string' == typeof item && item.includes(',')) {
|
|
56
|
+
const [x, y] = item.split(',');
|
|
57
|
+
bboxList.push(Number(x.trim()), Number(y.trim()));
|
|
58
|
+
} else if ('string' == typeof item && item.includes(' ')) {
|
|
59
|
+
const [x, y] = item.split(' ');
|
|
60
|
+
bboxList.push(Number(x.trim()), Number(y.trim()));
|
|
61
|
+
} else bboxList.push(Number(item));
|
|
62
|
+
});
|
|
63
|
+
else bboxList = bbox;
|
|
64
|
+
if (4 === bboxList.length || 5 === bboxList.length) return [
|
|
65
|
+
Math.round(bboxList[0] * width / 1000),
|
|
66
|
+
Math.round(bboxList[1] * height / 1000),
|
|
67
|
+
Math.round(bboxList[2] * width / 1000),
|
|
68
|
+
Math.round(bboxList[3] * height / 1000)
|
|
69
|
+
];
|
|
70
|
+
if (6 === bboxList.length || 2 === bboxList.length || 3 === bboxList.length || 7 === bboxList.length) return [
|
|
71
|
+
Math.max(0, Math.round(bboxList[0] * width / 1000) - defaultBboxSize / 2),
|
|
72
|
+
Math.max(0, Math.round(bboxList[1] * height / 1000) - defaultBboxSize / 2),
|
|
73
|
+
Math.min(width, Math.round(bboxList[0] * width / 1000) + defaultBboxSize / 2),
|
|
74
|
+
Math.min(height, Math.round(bboxList[1] * height / 1000) + defaultBboxSize / 2)
|
|
75
|
+
];
|
|
76
|
+
if (8 === bbox.length) return [
|
|
77
|
+
Math.round(bboxList[0] * width / 1000),
|
|
78
|
+
Math.round(bboxList[1] * height / 1000),
|
|
79
|
+
Math.round(bboxList[4] * width / 1000),
|
|
80
|
+
Math.round(bboxList[5] * height / 1000)
|
|
81
|
+
];
|
|
82
|
+
const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;
|
|
83
|
+
throw new Error(msg);
|
|
84
|
+
}
|
|
85
|
+
function adaptBbox(bbox, width, height, rightLimit, bottomLimit, vlMode) {
|
|
86
|
+
let result = [
|
|
87
|
+
0,
|
|
88
|
+
0,
|
|
89
|
+
0,
|
|
90
|
+
0
|
|
91
|
+
];
|
|
92
|
+
result = 'doubao-vision' === vlMode || 'vlm-ui-tars' === vlMode ? adaptDoubaoBbox(bbox, width, height) : 'gemini' === vlMode ? adaptGeminiBbox(bbox, width, height) : 'qwen3-vl' === vlMode ? normalized01000(bbox, width, height) : adaptQwenBbox(bbox);
|
|
93
|
+
result[2] = Math.min(result[2], rightLimit);
|
|
94
|
+
result[3] = Math.min(result[3], bottomLimit);
|
|
95
|
+
return result;
|
|
96
|
+
}
|
|
97
|
+
function normalized01000(bbox, width, height) {
|
|
98
|
+
return [
|
|
99
|
+
Math.round(bbox[0] * width / 1000),
|
|
100
|
+
Math.round(bbox[1] * height / 1000),
|
|
101
|
+
Math.round(bbox[2] * width / 1000),
|
|
102
|
+
Math.round(bbox[3] * height / 1000)
|
|
103
|
+
];
|
|
104
|
+
}
|
|
105
|
+
function adaptGeminiBbox(bbox, width, height) {
|
|
106
|
+
const left = Math.round(bbox[1] * width / 1000);
|
|
107
|
+
const top = Math.round(bbox[0] * height / 1000);
|
|
108
|
+
const right = Math.round(bbox[3] * width / 1000);
|
|
109
|
+
const bottom = Math.round(bbox[2] * height / 1000);
|
|
110
|
+
return [
|
|
111
|
+
left,
|
|
112
|
+
top,
|
|
113
|
+
right,
|
|
114
|
+
bottom
|
|
115
|
+
];
|
|
116
|
+
}
|
|
117
|
+
function adaptBboxToRect(bbox, width, height, offsetX = 0, offsetY = 0, rightLimit = width, bottomLimit = height, vlMode) {
|
|
118
|
+
debugInspectUtils('adaptBboxToRect', bbox, width, height, 'offset', offsetX, offsetY, 'limit', rightLimit, bottomLimit, 'vlMode', vlMode);
|
|
119
|
+
const [left, top, right, bottom] = adaptBbox(bbox, width, height, rightLimit, bottomLimit, vlMode);
|
|
120
|
+
const rectLeft = left;
|
|
121
|
+
const rectTop = top;
|
|
122
|
+
let rectWidth = right - left;
|
|
123
|
+
let rectHeight = bottom - top;
|
|
124
|
+
if (rectLeft + rectWidth > width) rectWidth = width - rectLeft;
|
|
125
|
+
if (rectTop + rectHeight > height) rectHeight = height - rectTop;
|
|
126
|
+
rectWidth = Math.max(1, rectWidth);
|
|
127
|
+
rectHeight = Math.max(1, rectHeight);
|
|
128
|
+
const rect = {
|
|
129
|
+
left: rectLeft + offsetX,
|
|
130
|
+
top: rectTop + offsetY,
|
|
131
|
+
width: rectWidth,
|
|
132
|
+
height: rectHeight
|
|
133
|
+
};
|
|
134
|
+
debugInspectUtils('adaptBboxToRect, result=', rect);
|
|
135
|
+
return rect;
|
|
136
|
+
}
|
|
137
|
+
let warned = false;
|
|
138
|
+
function warnGPT4oSizeLimit(size, modelName) {
|
|
139
|
+
if (warned) return;
|
|
140
|
+
if (modelName.toLowerCase().includes('gpt-4o')) {
|
|
141
|
+
const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;
|
|
142
|
+
if (Math.max(size.width, size.height) > 2000 || Math.min(size.width, size.height) > 768) {
|
|
143
|
+
console.warn(warningMsg);
|
|
144
|
+
warned = true;
|
|
145
|
+
}
|
|
146
|
+
} else if (size.width > 1800 || size.height > 1800) {
|
|
147
|
+
console.warn(`The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`);
|
|
148
|
+
warned = true;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
function mergeRects(rects) {
|
|
152
|
+
const minLeft = Math.min(...rects.map((r)=>r.left));
|
|
153
|
+
const minTop = Math.min(...rects.map((r)=>r.top));
|
|
154
|
+
const maxRight = Math.max(...rects.map((r)=>r.left + r.width));
|
|
155
|
+
const maxBottom = Math.max(...rects.map((r)=>r.top + r.height));
|
|
156
|
+
return {
|
|
157
|
+
left: minLeft,
|
|
158
|
+
top: minTop,
|
|
159
|
+
width: maxRight - minLeft,
|
|
160
|
+
height: maxBottom - minTop
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
function expandSearchArea(rect, screenSize, vlMode) {
|
|
164
|
+
const minEdgeSize = 'doubao-vision' === vlMode ? 500 : 300;
|
|
165
|
+
const defaultPadding = 160;
|
|
166
|
+
const paddingSizeHorizontal = rect.width < minEdgeSize ? Math.ceil((minEdgeSize - rect.width) / 2) : defaultPadding;
|
|
167
|
+
const paddingSizeVertical = rect.height < minEdgeSize ? Math.ceil((minEdgeSize - rect.height) / 2) : defaultPadding;
|
|
168
|
+
let newWidth = Math.max(minEdgeSize, rect.width + 2 * paddingSizeHorizontal);
|
|
169
|
+
let newHeight = Math.max(minEdgeSize, rect.height + 2 * paddingSizeVertical);
|
|
170
|
+
let newLeft = rect.left - paddingSizeHorizontal;
|
|
171
|
+
let newTop = rect.top - paddingSizeVertical;
|
|
172
|
+
if (newLeft + newWidth > screenSize.width) newLeft = screenSize.width - newWidth;
|
|
173
|
+
if (newTop + newHeight > screenSize.height) newTop = screenSize.height - newHeight;
|
|
174
|
+
newLeft = Math.max(0, newLeft);
|
|
175
|
+
newTop = Math.max(0, newTop);
|
|
176
|
+
if (newLeft + newWidth > screenSize.width) newWidth = screenSize.width - newLeft;
|
|
177
|
+
if (newTop + newHeight > screenSize.height) newHeight = screenSize.height - newTop;
|
|
178
|
+
rect.left = newLeft;
|
|
179
|
+
rect.top = newTop;
|
|
180
|
+
rect.width = newWidth;
|
|
181
|
+
rect.height = newHeight;
|
|
182
|
+
return rect;
|
|
183
|
+
}
|
|
184
|
+
async function markupImageForLLM(screenshotBase64, tree, size) {
|
|
185
|
+
const elementsInfo = treeToList(tree);
|
|
186
|
+
const elementsPositionInfoWithoutText = elementsInfo.filter((elementInfo)=>{
|
|
187
|
+
if (elementInfo.attributes.nodeType === NodeType.TEXT) return false;
|
|
188
|
+
return true;
|
|
189
|
+
});
|
|
190
|
+
const imagePayload = await compositeElementInfoImg({
|
|
191
|
+
inputImgBase64: screenshotBase64,
|
|
192
|
+
elementsPositionInfo: elementsPositionInfoWithoutText,
|
|
193
|
+
size
|
|
194
|
+
});
|
|
195
|
+
return imagePayload;
|
|
196
|
+
}
|
|
197
|
+
function buildYamlFlowFromPlans(plans, actionSpace, sleep) {
|
|
198
|
+
const flow = [];
|
|
199
|
+
for (const plan of plans){
|
|
200
|
+
const verb = plan.type;
|
|
201
|
+
const action = actionSpace.find((action)=>action.name === verb);
|
|
202
|
+
if (!action) {
|
|
203
|
+
console.warn(`Cannot convert action ${verb} to yaml flow. Will ignore it.`);
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
const flowKey = action.interfaceAlias || verb;
|
|
207
|
+
const flowParam = action.paramSchema ? dumpActionParam(plan.param || {}, action.paramSchema) : {};
|
|
208
|
+
const flowItem = {
|
|
209
|
+
[flowKey]: '',
|
|
210
|
+
...flowParam
|
|
211
|
+
};
|
|
212
|
+
flow.push(flowItem);
|
|
213
|
+
}
|
|
214
|
+
if (sleep) flow.push({
|
|
215
|
+
sleep
|
|
216
|
+
});
|
|
217
|
+
return flow;
|
|
218
|
+
}
|
|
219
|
+
const PointSchema = z.object({
|
|
220
|
+
left: z.number(),
|
|
221
|
+
top: z.number()
|
|
222
|
+
});
|
|
223
|
+
const SizeSchema = z.object({
|
|
224
|
+
width: z.number(),
|
|
225
|
+
height: z.number(),
|
|
226
|
+
dpr: z.number().optional()
|
|
227
|
+
});
|
|
228
|
+
const RectSchema = PointSchema.and(SizeSchema).and(z.object({
|
|
229
|
+
zoom: z.number().optional()
|
|
230
|
+
}));
|
|
231
|
+
const TMultimodalPromptSchema = z.object({
|
|
232
|
+
images: z.array(z.object({
|
|
233
|
+
name: z.string(),
|
|
234
|
+
url: z.string()
|
|
235
|
+
})).optional(),
|
|
236
|
+
convertHttpImage2Base64: z.boolean().optional()
|
|
237
|
+
});
|
|
238
|
+
const TUserPromptSchema = z.union([
|
|
239
|
+
z.string(),
|
|
240
|
+
z.object({
|
|
241
|
+
prompt: z.string()
|
|
242
|
+
}).and(TMultimodalPromptSchema.partial())
|
|
243
|
+
]);
|
|
244
|
+
const locateFieldFlagName = 'rpascene_location_field_flag';
|
|
245
|
+
const RpasceneLocationInput = z.object({
|
|
246
|
+
prompt: TUserPromptSchema,
|
|
247
|
+
deepThink: z.boolean().optional(),
|
|
248
|
+
cacheable: z.boolean().optional(),
|
|
249
|
+
xpath: z.union([
|
|
250
|
+
z.string(),
|
|
251
|
+
z.boolean()
|
|
252
|
+
]).optional()
|
|
253
|
+
}).passthrough();
|
|
254
|
+
z.object({
|
|
255
|
+
[locateFieldFlagName]: z.literal(true),
|
|
256
|
+
prompt: TUserPromptSchema,
|
|
257
|
+
deepThink: z.boolean().optional(),
|
|
258
|
+
cacheable: z.boolean().optional(),
|
|
259
|
+
xpath: z.boolean().optional(),
|
|
260
|
+
center: z.tuple([
|
|
261
|
+
z.number(),
|
|
262
|
+
z.number()
|
|
263
|
+
]),
|
|
264
|
+
rect: RectSchema
|
|
265
|
+
}).passthrough();
|
|
266
|
+
const getRpasceneLocationSchema = ()=>RpasceneLocationInput;
|
|
267
|
+
const ifRpasceneLocatorField = (field)=>{
|
|
268
|
+
var _actualField__def, _actualField__def1;
|
|
269
|
+
let actualField = field;
|
|
270
|
+
if ((null == (_actualField__def = actualField._def) ? void 0 : _actualField__def.typeName) === 'ZodOptional') actualField = actualField._def.innerType;
|
|
271
|
+
if ((null == (_actualField__def1 = actualField._def) ? void 0 : _actualField__def1.typeName) === 'ZodObject') {
|
|
272
|
+
const shape = actualField._def.shape();
|
|
273
|
+
if (locateFieldFlagName in shape) return true;
|
|
274
|
+
if ('prompt' in shape && shape.prompt) return true;
|
|
275
|
+
}
|
|
276
|
+
return false;
|
|
277
|
+
};
|
|
278
|
+
const dumpRpasceneLocatorField = (field)=>{
|
|
279
|
+
assert(ifRpasceneLocatorField(field), 'field is not a rpascene locator field');
|
|
280
|
+
if ('string' == typeof field) return field;
|
|
281
|
+
if (field && 'object' == typeof field && field.prompt) {
|
|
282
|
+
if ('string' == typeof field.prompt) return field.prompt;
|
|
283
|
+
if ('object' == typeof field.prompt && field.prompt.prompt) return field.prompt.prompt;
|
|
284
|
+
}
|
|
285
|
+
return String(field);
|
|
286
|
+
};
|
|
287
|
+
const findAllRpasceneLocatorField = (zodType, requiredOnly)=>{
|
|
288
|
+
var _zodObject__def;
|
|
289
|
+
if (!zodType) return [];
|
|
290
|
+
const zodObject = zodType;
|
|
291
|
+
if ((null == (_zodObject__def = zodObject._def) ? void 0 : _zodObject__def.typeName) === 'ZodObject' && zodObject.shape) {
|
|
292
|
+
const keys = Object.keys(zodObject.shape);
|
|
293
|
+
return keys.filter((key)=>{
|
|
294
|
+
const field = zodObject.shape[key];
|
|
295
|
+
if (!ifRpasceneLocatorField(field)) return false;
|
|
296
|
+
if (requiredOnly) {
|
|
297
|
+
var _field__def;
|
|
298
|
+
return (null == (_field__def = field._def) ? void 0 : _field__def.typeName) !== 'ZodOptional';
|
|
299
|
+
}
|
|
300
|
+
return true;
|
|
301
|
+
});
|
|
302
|
+
}
|
|
303
|
+
return [];
|
|
304
|
+
};
|
|
305
|
+
const dumpActionParam = (jsonObject, zodSchema)=>{
|
|
306
|
+
const locatorFields = findAllRpasceneLocatorField(zodSchema);
|
|
307
|
+
const result = {
|
|
308
|
+
...jsonObject
|
|
309
|
+
};
|
|
310
|
+
for (const fieldName of locatorFields){
|
|
311
|
+
const fieldValue = result[fieldName];
|
|
312
|
+
if (fieldValue) {
|
|
313
|
+
if ('string' == typeof fieldValue) result[fieldName] = fieldValue;
|
|
314
|
+
else if ('object' == typeof fieldValue) {
|
|
315
|
+
if (fieldValue.prompt) {
|
|
316
|
+
if ('string' == typeof fieldValue.prompt) result[fieldName] = fieldValue.prompt;
|
|
317
|
+
else if ('object' == typeof fieldValue.prompt && fieldValue.prompt.prompt) result[fieldName] = fieldValue.prompt.prompt;
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
return result;
|
|
323
|
+
};
|
|
324
|
+
const loadActionParam = (jsonObject, zodSchema)=>{
|
|
325
|
+
const locatorFields = findAllRpasceneLocatorField(zodSchema);
|
|
326
|
+
const result = {
|
|
327
|
+
...jsonObject
|
|
328
|
+
};
|
|
329
|
+
for (const fieldName of locatorFields){
|
|
330
|
+
const fieldValue = result[fieldName];
|
|
331
|
+
if (fieldValue && 'string' == typeof fieldValue) result[fieldName] = {
|
|
332
|
+
[locateFieldFlagName]: true,
|
|
333
|
+
prompt: fieldValue
|
|
334
|
+
};
|
|
335
|
+
}
|
|
336
|
+
return result;
|
|
337
|
+
};
|
|
338
|
+
const parseActionParam = (rawParam, zodSchema)=>{
|
|
339
|
+
const param = rawParam ?? {};
|
|
340
|
+
const locateFields = findAllRpasceneLocatorField(zodSchema);
|
|
341
|
+
if (0 === locateFields.length) return zodSchema.parse(param);
|
|
342
|
+
const locateFieldValues = {};
|
|
343
|
+
for (const fieldName of locateFields)if (fieldName in param) locateFieldValues[fieldName] = param[fieldName];
|
|
344
|
+
const paramsForValidation = {};
|
|
345
|
+
for(const key in param)if (locateFields.includes(key)) paramsForValidation[key] = {
|
|
346
|
+
prompt: '_dummy_'
|
|
347
|
+
};
|
|
348
|
+
else paramsForValidation[key] = param[key];
|
|
349
|
+
const validated = zodSchema.parse(paramsForValidation);
|
|
350
|
+
for(const fieldName in locateFieldValues)validated[fieldName] = locateFieldValues[fieldName];
|
|
351
|
+
return validated;
|
|
352
|
+
};
|
|
353
|
+
export { common_AIActionType as AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBbox, adaptBboxToRect, adaptDoubaoBbox, adaptGeminiBbox, adaptQwenBbox, buildYamlFlowFromPlans, dumpActionParam, dumpRpasceneLocatorField, expandSearchArea, fillBboxParam, findAllRpasceneLocatorField, getRpasceneLocationSchema, ifRpasceneLocatorField, loadActionParam, markupImageForLLM, mergeRects, normalized01000, parseActionParam, warnGPT4oSizeLimit };
|
|
354
|
+
|
|
355
|
+
//# sourceMappingURL=common.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model\\common.mjs","sources":["webpack://@rpascene/core/./src/ai-model/common.ts"],"sourcesContent":["import type {\n AIUsageInfo,\n BaseElement,\n DeviceAction,\n ElementTreeNode,\n RpasceneYamlFlowItem,\n PlanningAction,\n Rect,\n Size,\n} from '@/types';\nimport { assert } from '@rpascene/shared/utils';\n\nimport type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nimport type { PlanningLocateParam } from '@/types';\nimport { NodeType } from '@rpascene/shared/constants';\nimport type { TVlModeTypes } from '@rpascene/shared/env';\nimport { treeToList } from '@rpascene/shared/extractor';\nimport { compositeElementInfoImg } from '@rpascene/shared/img';\nimport { getDebug } from '@rpascene/shared/logger';\nimport { z } from 'zod';\n\nexport type AIArgs = ChatCompletionMessageParam[];\n\nexport enum AIActionType {\n ASSERT = 0,\n INSPECT_ELEMENT = 1,\n EXTRACT_DATA = 2,\n PLAN = 3,\n DESCRIBE_ELEMENT = 4,\n TEXT = 5,\n}\n\nconst defaultBboxSize = 20; // must be even number\nconst debugInspectUtils = getDebug('ai:common');\n\n// transform the param of locate from qwen mode\nexport function fillBboxParam(\n locate: PlanningLocateParam,\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n) {\n // The Qwen model might have hallucinations of naming bbox as bbox_2d.\n if ((locate as any).bbox_2d && !locate?.bbox) {\n locate.bbox = (locate as any).bbox_2d;\n // biome-ignore lint/performance/noDelete: <explanation>\n delete (locate as any).bbox_2d;\n }\n\n if (locate?.bbox) {\n locate.bbox = adaptBbox(\n locate.bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n }\n\n return locate;\n}\n\nexport function adaptQwenBbox(\n bbox: number[],\n): [number, number, number, number] {\n if (bbox.length < 2) {\n const msg = `invalid bbox data for qwen-vl mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n }\n\n const result: [number, number, number, number] = [\n Math.round(bbox[0]),\n Math.round(bbox[1]),\n typeof bbox[2] === 'number'\n ? Math.round(bbox[2])\n : Math.round(bbox[0] + defaultBboxSize),\n typeof bbox[3] === 'number'\n ? Math.round(bbox[3])\n : Math.round(bbox[1] + defaultBboxSize),\n ];\n return result;\n}\n\nexport function adaptDoubaoBbox(\n bbox: string[] | number[] | string,\n width: number,\n height: number,\n): [number, number, number, number] {\n assert(\n width > 0 && height > 0,\n 'width and height must be greater than 0 in doubao mode',\n );\n\n if (typeof bbox === 'string') {\n assert(\n /^(\\d+)\\s(\\d+)\\s(\\d+)\\s(\\d+)$/.test(bbox.trim()),\n `invalid bbox data string for doubao-vision mode: ${bbox}`,\n );\n const splitted = bbox.split(' ');\n if (splitted.length === 4) {\n return [\n Math.round((Number(splitted[0]) * width) / 1000),\n Math.round((Number(splitted[1]) * height) / 1000),\n Math.round((Number(splitted[2]) * width) / 1000),\n Math.round((Number(splitted[3]) * height) / 1000),\n ];\n }\n throw new Error(`invalid bbox data string for doubao-vision mode: ${bbox}`);\n }\n\n if (Array.isArray(bbox) && Array.isArray(bbox[0])) {\n bbox = bbox[0];\n }\n\n let bboxList: number[] = [];\n if (Array.isArray(bbox) && typeof bbox[0] === 'string') {\n bbox.forEach((item) => {\n if (typeof item === 'string' && item.includes(',')) {\n const [x, y] = item.split(',');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else if (typeof item === 'string' && item.includes(' ')) {\n const [x, y] = item.split(' ');\n bboxList.push(Number(x.trim()), Number(y.trim()));\n } else {\n bboxList.push(Number(item));\n }\n });\n } else {\n bboxList = bbox as any;\n }\n\n if (bboxList.length === 4 || bboxList.length === 5) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[2] * width) / 1000),\n Math.round((bboxList[3] * height) / 1000),\n ];\n }\n\n // treat the bbox as a center point\n if (\n bboxList.length === 6 ||\n bboxList.length === 2 ||\n bboxList.length === 3 ||\n bboxList.length === 7\n ) {\n return [\n Math.max(\n 0,\n Math.round((bboxList[0] * width) / 1000) - defaultBboxSize / 2,\n ),\n Math.max(\n 0,\n Math.round((bboxList[1] * height) / 1000) - defaultBboxSize / 2,\n ),\n Math.min(\n width,\n Math.round((bboxList[0] * width) / 1000) + defaultBboxSize / 2,\n ),\n Math.min(\n height,\n Math.round((bboxList[1] * height) / 1000) + defaultBboxSize / 2,\n ),\n ];\n }\n\n if (bbox.length === 8) {\n return [\n Math.round((bboxList[0] * width) / 1000),\n Math.round((bboxList[1] * height) / 1000),\n Math.round((bboxList[4] * width) / 1000),\n Math.round((bboxList[5] * height) / 1000),\n ];\n }\n\n const msg = `invalid bbox data for doubao-vision mode: ${JSON.stringify(bbox)} `;\n throw new Error(msg);\n}\n\nexport function adaptBbox(\n bbox: number[],\n width: number,\n height: number,\n rightLimit: number,\n bottomLimit: number,\n vlMode: TVlModeTypes | undefined,\n): [number, number, number, number] {\n let result: [number, number, number, number] = [0, 0, 0, 0];\n if (vlMode === 'doubao-vision' || vlMode === 'vlm-ui-tars') {\n result = adaptDoubaoBbox(bbox, width, height);\n } else if (vlMode === 'gemini') {\n result = adaptGeminiBbox(bbox, width, height);\n } else if (vlMode === 'qwen3-vl') {\n result = normalized01000(bbox, width, height);\n } else {\n result = adaptQwenBbox(bbox);\n }\n\n result[2] = Math.min(result[2], rightLimit);\n result[3] = Math.min(result[3], bottomLimit);\n\n return result;\n}\n\n// x1, y1, x2, y2 -> 0-1000\nexport function normalized01000(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n return [\n Math.round((bbox[0] * width) / 1000),\n Math.round((bbox[1] * height) / 1000),\n Math.round((bbox[2] * width) / 1000),\n Math.round((bbox[3] * height) / 1000),\n ];\n}\n\n// y1, x1, y2, x2 -> 0-1000\nexport function adaptGeminiBbox(\n bbox: number[],\n width: number,\n height: number,\n): [number, number, number, number] {\n const left = Math.round((bbox[1] * width) / 1000);\n const top = Math.round((bbox[0] * height) / 1000);\n const right = Math.round((bbox[3] * width) / 1000);\n const bottom = Math.round((bbox[2] * height) / 1000);\n return [left, top, right, bottom];\n}\n\nexport function adaptBboxToRect(\n bbox: number[],\n width: number,\n height: number,\n offsetX = 0,\n offsetY = 0,\n rightLimit = width,\n bottomLimit = height,\n vlMode?: TVlModeTypes | undefined,\n): Rect {\n debugInspectUtils(\n 'adaptBboxToRect',\n bbox,\n width,\n height,\n 'offset',\n offsetX,\n offsetY,\n 'limit',\n rightLimit,\n bottomLimit,\n 'vlMode',\n vlMode,\n );\n const [left, top, right, bottom] = adaptBbox(\n bbox,\n width,\n height,\n rightLimit,\n bottomLimit,\n vlMode,\n );\n\n // Calculate initial rect dimensions\n const rectLeft = left;\n const rectTop = top;\n let rectWidth = right - left;\n let rectHeight = bottom - top;\n\n // Ensure the rect doesn't exceed image boundaries\n // If right edge exceeds width, adjust the width\n if (rectLeft + rectWidth > width) {\n rectWidth = width - rectLeft;\n }\n\n // If bottom edge exceeds height, adjust the height\n if (rectTop + rectHeight > height) {\n rectHeight = height - rectTop;\n }\n\n // Ensure minimum dimensions (width and height should be at least 1)\n rectWidth = Math.max(1, rectWidth);\n rectHeight = Math.max(1, rectHeight);\n\n const rect = {\n left: rectLeft + offsetX,\n top: rectTop + offsetY,\n width: rectWidth,\n height: rectHeight,\n };\n debugInspectUtils('adaptBboxToRect, result=', rect);\n\n return rect;\n}\n\nlet warned = false;\nexport function warnGPT4oSizeLimit(size: Size, modelName: string) {\n if (warned) return;\n if (modelName.toLowerCase().includes('gpt-4o')) {\n const warningMsg = `GPT-4o has a maximum image input size of 2000x768 or 768x2000, but got ${size.width}x${size.height}. Please set your interface to a smaller resolution. Otherwise, the result may be inaccurate.`;\n\n if (\n Math.max(size.width, size.height) > 2000 ||\n Math.min(size.width, size.height) > 768\n ) {\n console.warn(warningMsg);\n warned = true;\n }\n } else if (size.width > 1800 || size.height > 1800) {\n console.warn(\n `The image size seems too large (${size.width}x${size.height}). It may lead to more token usage, slower response, and inaccurate result.`,\n );\n warned = true;\n }\n}\n\nexport function mergeRects(rects: Rect[]) {\n const minLeft = Math.min(...rects.map((r) => r.left));\n const minTop = Math.min(...rects.map((r) => r.top));\n const maxRight = Math.max(...rects.map((r) => r.left + r.width));\n const maxBottom = Math.max(...rects.map((r) => r.top + r.height));\n return {\n left: minLeft,\n top: minTop,\n width: maxRight - minLeft,\n height: maxBottom - minTop,\n };\n}\n\n// expand the search area to at least 300 x 300, or add a default padding\nexport function expandSearchArea(\n rect: Rect,\n screenSize: Size,\n vlMode: TVlModeTypes | undefined,\n) {\n const minEdgeSize = vlMode === 'doubao-vision' ? 500 : 300;\n const defaultPadding = 160;\n\n // Calculate padding needed to reach minimum edge size\n const paddingSizeHorizontal =\n rect.width < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.width) / 2)\n : defaultPadding;\n const paddingSizeVertical =\n rect.height < minEdgeSize\n ? Math.ceil((minEdgeSize - rect.height) / 2)\n : defaultPadding;\n\n // Calculate new dimensions (ensure minimum edge size)\n let newWidth = Math.max(minEdgeSize, rect.width + paddingSizeHorizontal * 2);\n let newHeight = Math.max(minEdgeSize, rect.height + paddingSizeVertical * 2);\n\n // Calculate initial position with padding\n let newLeft = rect.left - paddingSizeHorizontal;\n let newTop = rect.top - paddingSizeVertical;\n\n // Ensure the rect doesn't exceed screen boundaries by adjusting position\n // If the rect goes beyond the right edge, shift it left\n if (newLeft + newWidth > screenSize.width) {\n newLeft = screenSize.width - newWidth;\n }\n\n // If the rect goes beyond the bottom edge, shift it up\n if (newTop + newHeight > screenSize.height) {\n newTop = screenSize.height - newHeight;\n }\n\n // Ensure the rect doesn't go beyond the left/top edges\n newLeft = Math.max(0, newLeft);\n newTop = Math.max(0, newTop);\n\n // If after position adjustment, the rect still exceeds screen boundaries,\n // clamp the dimensions to fit within screen\n if (newLeft + newWidth > screenSize.width) {\n newWidth = screenSize.width - newLeft;\n }\n if (newTop + newHeight > screenSize.height) {\n newHeight = screenSize.height - newTop;\n }\n\n rect.left = newLeft;\n rect.top = newTop;\n rect.width = newWidth;\n rect.height = newHeight;\n\n return rect;\n}\n\nexport async function markupImageForLLM(\n screenshotBase64: string,\n tree: ElementTreeNode<BaseElement>,\n size: Size,\n) {\n const elementsInfo = treeToList(tree);\n const elementsPositionInfoWithoutText = elementsInfo!.filter(\n (elementInfo) => {\n if (elementInfo.attributes.nodeType === NodeType.TEXT) {\n return false;\n }\n return true;\n },\n );\n\n const imagePayload = await compositeElementInfoImg({\n inputImgBase64: screenshotBase64,\n elementsPositionInfo: elementsPositionInfoWithoutText,\n size,\n });\n return imagePayload;\n}\n\nexport function buildYamlFlowFromPlans(\n plans: PlanningAction[],\n actionSpace: DeviceAction<any>[],\n sleep?: number,\n): RpasceneYamlFlowItem[] {\n const flow: RpasceneYamlFlowItem[] = [];\n\n for (const plan of plans) {\n const verb = plan.type;\n\n const action = actionSpace.find((action) => action.name === verb);\n if (!action) {\n console.warn(\n `Cannot convert action ${verb} to yaml flow. Will ignore it.`,\n );\n continue;\n }\n\n const flowKey = action.interfaceAlias || verb;\n const flowParam = action.paramSchema\n ? dumpActionParam(plan.param || {}, action.paramSchema)\n : {};\n\n const flowItem: RpasceneYamlFlowItem = {\n [flowKey]: '',\n ...flowParam,\n };\n\n flow.push(flowItem);\n }\n\n if (sleep) {\n flow.push({\n sleep,\n });\n }\n\n return flow;\n}\n\n// Zod schemas for shared types\nexport const PointSchema = z.object({\n left: z.number(),\n top: z.number(),\n});\n\nexport const SizeSchema = z.object({\n width: z.number(),\n height: z.number(),\n dpr: z.number().optional(),\n});\n\nexport const RectSchema = PointSchema.and(SizeSchema).and(\n z.object({\n zoom: z.number().optional(),\n }),\n);\n\n// Zod schema for TMultimodalPrompt\nexport const TMultimodalPromptSchema = z.object({\n images: z\n .array(\n z.object({\n name: z.string(),\n url: z.string(),\n }),\n )\n .optional(),\n convertHttpImage2Base64: z.boolean().optional(),\n});\n\n// Zod schema for TUserPrompt\nexport const TUserPromptSchema = z.union([\n z.string(),\n z\n .object({\n prompt: z.string(),\n })\n .and(TMultimodalPromptSchema.partial()),\n]);\n\n// Generate TypeScript types from Zod schemas\nexport type TMultimodalPrompt = z.infer<typeof TMultimodalPromptSchema>;\nexport type TUserPrompt = z.infer<typeof TUserPromptSchema>;\n\nconst locateFieldFlagName = 'rpascene_location_field_flag';\n\n// Schema for locator field input (when users provide locate parameters)\nconst RpasceneLocationInput = z\n .object({\n prompt: TUserPromptSchema,\n deepThink: z.boolean().optional(),\n cacheable: z.boolean().optional(),\n xpath: z.union([z.string(), z.boolean()]).optional(),\n })\n .passthrough();\n\n// Schema for locator field result (when AI returns locate results)\nconst RpasceneLocationResult = z\n .object({\n [locateFieldFlagName]: z.literal(true),\n prompt: TUserPromptSchema,\n\n // optional fields\n deepThink: z.boolean().optional(), // only available in vl model\n cacheable: z.boolean().optional(),\n xpath: z.boolean().optional(), // preset result for xpath\n\n // these two fields will only appear in the result\n center: z.tuple([z.number(), z.number()]),\n rect: RectSchema,\n })\n .passthrough();\n\n// Export the result type - this is used for runtime results that include center and rect\nexport type RpasceneLocationResultType = z.infer<typeof RpasceneLocationResult>;\n\n// Export the input type - this is the inferred type from getRpasceneLocationSchema()\nexport type RpasceneLocationInputType = z.infer<typeof RpasceneLocationInput>;\n\n/**\n * Returns the schema for locator fields.\n * This now returns the input schema which is more permissive and suitable for validation.\n */\nexport const getRpasceneLocationSchema = () => {\n return RpasceneLocationInput;\n};\n\nexport const ifRpasceneLocatorField = (field: any): boolean => {\n // Handle optional fields by getting the inner type\n let actualField = field;\n if (actualField._def?.typeName === 'ZodOptional') {\n actualField = actualField._def.innerType;\n }\n\n // Check if this is a ZodObject\n if (actualField._def?.typeName === 'ZodObject') {\n const shape = actualField._def.shape();\n\n // Method 1: Check for the location field flag (for result schema)\n if (locateFieldFlagName in shape) {\n return true;\n }\n\n // Method 2: Check if it's the input schema by checking for 'prompt' field\n // Input schema has 'prompt' as a required field\n if ('prompt' in shape && shape.prompt) {\n return true;\n }\n }\n\n return false;\n};\n\nexport const dumpRpasceneLocatorField = (field: any): string => {\n assert(\n ifRpasceneLocatorField(field),\n 'field is not a rpascene locator field',\n );\n\n // If field is a string, return it directly\n if (typeof field === 'string') {\n return field;\n }\n\n // If field is an object with prompt property\n if (field && typeof field === 'object' && field.prompt) {\n // If prompt is a string, return it directly\n if (typeof field.prompt === 'string') {\n return field.prompt;\n }\n // If prompt is a TUserPrompt object, extract the prompt string\n if (typeof field.prompt === 'object' && field.prompt.prompt) {\n return field.prompt.prompt; // TODO: dump images if necessary\n }\n }\n\n // Fallback: try to convert to string\n return String(field);\n};\n\nexport const findAllRpasceneLocatorField = (\n zodType?: z.ZodType<any>,\n requiredOnly?: boolean,\n): string[] => {\n if (!zodType) {\n return [];\n }\n\n // Check if this is a ZodObject by checking if it has a shape property\n const zodObject = zodType as any;\n if (zodObject._def?.typeName === 'ZodObject' && zodObject.shape) {\n const keys = Object.keys(zodObject.shape);\n return keys.filter((key) => {\n const field = zodObject.shape[key];\n if (!ifRpasceneLocatorField(field)) {\n return false;\n }\n\n // If requiredOnly is true, filter out optional fields\n if (requiredOnly) {\n return field._def?.typeName !== 'ZodOptional';\n }\n\n return true;\n });\n }\n\n // For other ZodType instances, we can't extract field names\n return [];\n};\n\nexport const dumpActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllRpasceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue) {\n // If it's already a string, keep it as is\n if (typeof fieldValue === 'string') {\n result[fieldName] = fieldValue;\n } else if (typeof fieldValue === 'object') {\n // Check if this field is actually a RpasceneLocationType object\n if (fieldValue.prompt) {\n // If prompt is a string, use it directly\n if (typeof fieldValue.prompt === 'string') {\n result[fieldName] = fieldValue.prompt;\n } else if (\n typeof fieldValue.prompt === 'object' &&\n fieldValue.prompt.prompt\n ) {\n // If prompt is a TUserPrompt object, extract the prompt string\n result[fieldName] = fieldValue.prompt.prompt;\n }\n }\n }\n }\n }\n\n return result;\n};\n\nexport const loadActionParam = (\n jsonObject: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n const locatorFields = findAllRpasceneLocatorField(zodSchema);\n const result = { ...jsonObject };\n\n for (const fieldName of locatorFields) {\n const fieldValue = result[fieldName];\n if (fieldValue && typeof fieldValue === 'string') {\n result[fieldName] = {\n [locateFieldFlagName]: true,\n prompt: fieldValue,\n };\n }\n }\n\n return result;\n};\n\n/**\n * Parse and validate action parameters using Zod schema.\n * All fields are validated through Zod, EXCEPT locator fields which are skipped.\n * Default values defined in the schema are automatically applied.\n *\n * Locator fields are special business logic fields with complex validation requirements,\n * so they are intentionally excluded from Zod parsing and use existing validation logic.\n */\nexport const parseActionParam = (\n rawParam: Record<string, any>,\n zodSchema: z.ZodType<any>,\n): Record<string, any> => {\n // Handle undefined or null rawParam by providing an empty object\n const param = rawParam ?? {};\n\n // Find all locate fields in the schema\n const locateFields = findAllRpasceneLocatorField(zodSchema);\n\n // If there are no locate fields, just do normal validation\n if (locateFields.length === 0) {\n return zodSchema.parse(param);\n }\n\n // Extract locate field values to restore later\n const locateFieldValues: Record<string, any> = {};\n for (const fieldName of locateFields) {\n if (fieldName in param) {\n locateFieldValues[fieldName] = param[fieldName];\n }\n }\n\n // Build params for validation - skip locate fields and use dummy values\n const paramsForValidation: Record<string, any> = {};\n for (const key in param) {\n if (locateFields.includes(key)) {\n // Use dummy value to satisfy schema validation\n paramsForValidation[key] = { prompt: '_dummy_' };\n } else {\n paramsForValidation[key] = param[key];\n }\n }\n\n // Validate with dummy locate values\n const validated = zodSchema.parse(paramsForValidation);\n\n // Restore the actual locate field values (unvalidated, as per business requirement)\n for (const fieldName in locateFieldValues) {\n validated[fieldName] = locateFieldValues[fieldName];\n }\n\n return validated;\n};\n"],"names":["AIActionType","defaultBboxSize","debugInspectUtils","getDebug","fillBboxParam","locate","width","height","rightLimit","bottomLimit","vlMode","adaptBbox","adaptQwenBbox","bbox","msg","JSON","Error","result","Math","adaptDoubaoBbox","assert","splitted","Number","Array","bboxList","item","x","y","adaptGeminiBbox","normalized01000","left","top","right","bottom","adaptBboxToRect","offsetX","offsetY","rectLeft","rectTop","rectWidth","rectHeight","rect","warned","warnGPT4oSizeLimit","size","modelName","warningMsg","console","mergeRects","rects","minLeft","r","minTop","maxRight","maxBottom","expandSearchArea","screenSize","minEdgeSize","defaultPadding","paddingSizeHorizontal","paddingSizeVertical","newWidth","newHeight","newLeft","newTop","markupImageForLLM","screenshotBase64","tree","elementsInfo","treeToList","elementsPositionInfoWithoutText","elementInfo","NodeType","imagePayload","compositeElementInfoImg","buildYamlFlowFromPlans","plans","actionSpace","sleep","flow","plan","verb","action","flowKey","flowParam","dumpActionParam","flowItem","PointSchema","z","SizeSchema","RectSchema","TMultimodalPromptSchema","TUserPromptSchema","locateFieldFlagName","RpasceneLocationInput","getRpasceneLocationSchema","ifRpasceneLocatorField","field","_actualField__def","_actualField__def1","actualField","shape","dumpRpasceneLocatorField","String","findAllRpasceneLocatorField","zodType","requiredOnly","_zodObject__def","zodObject","keys","Object","key","_field__def","jsonObject","zodSchema","locatorFields","fieldName","fieldValue","loadActionParam","parseActionParam","rawParam","param","locateFields","locateFieldValues","paramsForValidation","validated"],"mappings":";;;;;;AAwBO,IAAKA,sBAAYA,WAAAA,GAAAA,SAAZA,YAAY;;;;;;;WAAZA;;AASZ,MAAMC,kBAAkB;AACxB,MAAMC,oBAAoBC,SAAS;AAG5B,SAASC,cACdC,MAA2B,EAC3BC,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAGhC,IAAKL,OAAe,OAAO,IAAI,CAACA,CAAAA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,AAAD,GAAG;QAC5CA,OAAO,IAAI,GAAIA,OAAe,OAAO;QAErC,OAAQA,OAAe,OAAO;IAChC;IAEA,IAAIA,QAAAA,SAAAA,KAAAA,IAAAA,OAAQ,IAAI,EACdA,OAAO,IAAI,GAAGM,UACZN,OAAO,IAAI,EACXC,OACAC,QACAC,YACAC,aACAC;IAIJ,OAAOL;AACT;AAEO,SAASO,cACdC,IAAc;IAEd,IAAIA,KAAK,MAAM,GAAG,GAAG;QACnB,MAAMC,MAAM,CAAC,oCAAoC,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;QAC1E,MAAM,IAAIG,MAAMF;IAClB;IAEA,MAAMG,SAA2C;QAC/CC,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE;QACC,YAAnB,OAAOA,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;QACN,YAAnB,OAAOY,IAAI,CAAC,EAAE,GACVK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,IAClBK,KAAK,KAAK,CAACL,IAAI,CAAC,EAAE,GAAGZ;KAC1B;IACD,OAAOgB;AACT;AAEO,SAASE,gBACdN,IAAkC,EAClCP,KAAa,EACbC,MAAc;IAEda,OACEd,QAAQ,KAAKC,SAAS,GACtB;IAGF,IAAI,AAAgB,YAAhB,OAAOM,MAAmB;QAC5BO,OACE,+BAA+B,IAAI,CAACP,KAAK,IAAI,KAC7C,CAAC,iDAAiD,EAAEA,MAAM;QAE5D,MAAMQ,WAAWR,KAAK,KAAK,CAAC;QAC5B,IAAIQ,AAAoB,MAApBA,SAAS,MAAM,EACjB,OAAO;YACLH,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;YAC5CW,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAIf,QAAS;YAC3CY,KAAK,KAAK,CAAEI,OAAOD,QAAQ,CAAC,EAAE,IAAId,SAAU;SAC7C;QAEH,MAAM,IAAIS,MAAM,CAAC,iDAAiD,EAAEH,MAAM;IAC5E;IAEA,IAAIU,MAAM,OAAO,CAACV,SAASU,MAAM,OAAO,CAACV,IAAI,CAAC,EAAE,GAC9CA,OAAOA,IAAI,CAAC,EAAE;IAGhB,IAAIW,WAAqB,EAAE;IAC3B,IAAID,MAAM,OAAO,CAACV,SAAS,AAAmB,YAAnB,OAAOA,IAAI,CAAC,EAAE,EACvCA,KAAK,OAAO,CAAC,CAACY;QACZ,IAAI,AAAgB,YAAhB,OAAOA,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YAClD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OAAO,IAAI,AAAgB,YAAhB,OAAOF,QAAqBA,KAAK,QAAQ,CAAC,MAAM;YACzD,MAAM,CAACC,GAAGC,EAAE,GAAGF,KAAK,KAAK,CAAC;YAC1BD,SAAS,IAAI,CAACF,OAAOI,EAAE,IAAI,KAAKJ,OAAOK,EAAE,IAAI;QAC/C,OACEH,SAAS,IAAI,CAACF,OAAOG;IAEzB;SAEAD,WAAWX;IAGb,IAAIW,AAAoB,MAApBA,SAAS,MAAM,IAAUA,AAAoB,MAApBA,SAAS,MAAM,EAC1C,OAAO;QACLN,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;QACpCW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;KACrC;IAIH,IACEiB,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,IACfA,AAAoB,MAApBA,SAAS,MAAM,EAEf,OAAO;QACLN,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACN,GACAA,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU,QAAQN,kBAAkB;QAEhEiB,KAAK,GAAG,CACNZ,OACAY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS,QAAQL,kBAAkB;QAE/DiB,KAAK,GAAG,CACNX,QACAW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU,QAAQN,kBAAkB;KAEjE;IAGH,IAAIY,AAAgB,MAAhBA,KAAK,MAAM,EACb,OAAO;QACLK,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;QACpCW,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGlB,QAAS;QACnCY,KAAK,KAAK,CAAEM,QAAQ,CAAC,EAAE,GAAGjB,SAAU;KACrC;IAGH,MAAMO,MAAM,CAAC,0CAA0C,EAAEC,KAAK,SAAS,CAACF,MAAM,CAAC,CAAC;IAChF,MAAM,IAAIG,MAAMF;AAClB;AAEO,SAASH,UACdE,IAAc,EACdP,KAAa,EACbC,MAAc,EACdC,UAAkB,EAClBC,WAAmB,EACnBC,MAAgC;IAEhC,IAAIO,SAA2C;QAAC;QAAG;QAAG;QAAG;KAAE;IAEzDA,SADEP,AAAW,oBAAXA,UAA8BA,AAAW,kBAAXA,SACvBS,gBAAgBN,MAAMP,OAAOC,UAC7BG,AAAW,aAAXA,SACAkB,gBAAgBf,MAAMP,OAAOC,UAC7BG,AAAW,eAAXA,SACAmB,gBAAgBhB,MAAMP,OAAOC,UAE7BK,cAAcC;IAGzBI,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAET;IAChCS,MAAM,CAAC,EAAE,GAAGC,KAAK,GAAG,CAACD,MAAM,CAAC,EAAE,EAAER;IAEhC,OAAOQ;AACT;AAGO,SAASY,gBACdhB,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,OAAO;QACLW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;QAChCW,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;QAC/BY,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;KACjC;AACH;AAGO,SAASqB,gBACdf,IAAc,EACdP,KAAa,EACbC,MAAc;IAEd,MAAMuB,OAAOZ,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC5C,MAAMyB,MAAMb,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC5C,MAAMyB,QAAQd,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGP,QAAS;IAC7C,MAAM2B,SAASf,KAAK,KAAK,CAAEL,IAAI,CAAC,EAAE,GAAGN,SAAU;IAC/C,OAAO;QAACuB;QAAMC;QAAKC;QAAOC;KAAO;AACnC;AAEO,SAASC,gBACdrB,IAAc,EACdP,KAAa,EACbC,MAAc,EACd4B,UAAU,CAAC,EACXC,UAAU,CAAC,EACX5B,aAAaF,KAAK,EAClBG,cAAcF,MAAM,EACpBG,MAAiC;IAEjCR,kBACE,mBACAW,MACAP,OACAC,QACA,UACA4B,SACAC,SACA,SACA5B,YACAC,aACA,UACAC;IAEF,MAAM,CAACoB,MAAMC,KAAKC,OAAOC,OAAO,GAAGtB,UACjCE,MACAP,OACAC,QACAC,YACAC,aACAC;IAIF,MAAM2B,WAAWP;IACjB,MAAMQ,UAAUP;IAChB,IAAIQ,YAAYP,QAAQF;IACxB,IAAIU,aAAaP,SAASF;IAI1B,IAAIM,WAAWE,YAAYjC,OACzBiC,YAAYjC,QAAQ+B;IAItB,IAAIC,UAAUE,aAAajC,QACzBiC,aAAajC,SAAS+B;IAIxBC,YAAYrB,KAAK,GAAG,CAAC,GAAGqB;IACxBC,aAAatB,KAAK,GAAG,CAAC,GAAGsB;IAEzB,MAAMC,OAAO;QACX,MAAMJ,WAAWF;QACjB,KAAKG,UAAUF;QACf,OAAOG;QACP,QAAQC;IACV;IACAtC,kBAAkB,4BAA4BuC;IAE9C,OAAOA;AACT;AAEA,IAAIC,SAAS;AACN,SAASC,mBAAmBC,IAAU,EAAEC,SAAiB;IAC9D,IAAIH,QAAQ;IACZ,IAAIG,UAAU,WAAW,GAAG,QAAQ,CAAC,WAAW;QAC9C,MAAMC,aAAa,CAAC,uEAAuE,EAAEF,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,6FAA6F,CAAC;QAErN,IACE1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,QACpC1B,KAAK,GAAG,CAAC0B,KAAK,KAAK,EAAEA,KAAK,MAAM,IAAI,KACpC;YACAG,QAAQ,IAAI,CAACD;YACbJ,SAAS;QACX;IACF,OAAO,IAAIE,KAAK,KAAK,GAAG,QAAQA,KAAK,MAAM,GAAG,MAAM;QAClDG,QAAQ,IAAI,CACV,CAAC,gCAAgC,EAAEH,KAAK,KAAK,CAAC,CAAC,EAAEA,KAAK,MAAM,CAAC,2EAA2E,CAAC;QAE3IF,SAAS;IACX;AACF;AAEO,SAASM,WAAWC,KAAa;IACtC,MAAMC,UAAUhC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI;IACnD,MAAMC,SAASlC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG;IACjD,MAAME,WAAWnC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,IAAI,GAAGA,EAAE,KAAK;IAC9D,MAAMG,YAAYpC,KAAK,GAAG,IAAI+B,MAAM,GAAG,CAAC,CAACE,IAAMA,EAAE,GAAG,GAAGA,EAAE,MAAM;IAC/D,OAAO;QACL,MAAMD;QACN,KAAKE;QACL,OAAOC,WAAWH;QAClB,QAAQI,YAAYF;IACtB;AACF;AAGO,SAASG,iBACdd,IAAU,EACVe,UAAgB,EAChB9C,MAAgC;IAEhC,MAAM+C,cAAc/C,AAAW,oBAAXA,SAA6B,MAAM;IACvD,MAAMgD,iBAAiB;IAGvB,MAAMC,wBACJlB,KAAK,KAAK,GAAGgB,cACTvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,KAAI,IAAK,KACvCiB;IACN,MAAME,sBACJnB,KAAK,MAAM,GAAGgB,cACVvC,KAAK,IAAI,CAAEuC,AAAAA,CAAAA,cAAchB,KAAK,MAAK,IAAK,KACxCiB;IAGN,IAAIG,WAAW3C,KAAK,GAAG,CAACuC,aAAahB,KAAK,KAAK,GAAGkB,AAAwB,IAAxBA;IAClD,IAAIG,YAAY5C,KAAK,GAAG,CAACuC,aAAahB,KAAK,MAAM,GAAGmB,AAAsB,IAAtBA;IAGpD,IAAIG,UAAUtB,KAAK,IAAI,GAAGkB;IAC1B,IAAIK,SAASvB,KAAK,GAAG,GAAGmB;IAIxB,IAAIG,UAAUF,WAAWL,WAAW,KAAK,EACvCO,UAAUP,WAAW,KAAK,GAAGK;IAI/B,IAAIG,SAASF,YAAYN,WAAW,MAAM,EACxCQ,SAASR,WAAW,MAAM,GAAGM;IAI/BC,UAAU7C,KAAK,GAAG,CAAC,GAAG6C;IACtBC,SAAS9C,KAAK,GAAG,CAAC,GAAG8C;IAIrB,IAAID,UAAUF,WAAWL,WAAW,KAAK,EACvCK,WAAWL,WAAW,KAAK,GAAGO;IAEhC,IAAIC,SAASF,YAAYN,WAAW,MAAM,EACxCM,YAAYN,WAAW,MAAM,GAAGQ;IAGlCvB,KAAK,IAAI,GAAGsB;IACZtB,KAAK,GAAG,GAAGuB;IACXvB,KAAK,KAAK,GAAGoB;IACbpB,KAAK,MAAM,GAAGqB;IAEd,OAAOrB;AACT;AAEO,eAAewB,kBACpBC,gBAAwB,EACxBC,IAAkC,EAClCvB,IAAU;IAEV,MAAMwB,eAAeC,WAAWF;IAChC,MAAMG,kCAAkCF,aAAc,MAAM,CAC1D,CAACG;QACC,IAAIA,YAAY,UAAU,CAAC,QAAQ,KAAKC,SAAS,IAAI,EACnD,OAAO;QAET,OAAO;IACT;IAGF,MAAMC,eAAe,MAAMC,wBAAwB;QACjD,gBAAgBR;QAChB,sBAAsBI;QACtB1B;IACF;IACA,OAAO6B;AACT;AAEO,SAASE,uBACdC,KAAuB,EACvBC,WAAgC,EAChCC,KAAc;IAEd,MAAMC,OAA+B,EAAE;IAEvC,KAAK,MAAMC,QAAQJ,MAAO;QACxB,MAAMK,OAAOD,KAAK,IAAI;QAEtB,MAAME,SAASL,YAAY,IAAI,CAAC,CAACK,SAAWA,OAAO,IAAI,KAAKD;QAC5D,IAAI,CAACC,QAAQ;YACXnC,QAAQ,IAAI,CACV,CAAC,sBAAsB,EAAEkC,KAAK,8BAA8B,CAAC;YAE/D;QACF;QAEA,MAAME,UAAUD,OAAO,cAAc,IAAID;QACzC,MAAMG,YAAYF,OAAO,WAAW,GAChCG,gBAAgBL,KAAK,KAAK,IAAI,CAAC,GAAGE,OAAO,WAAW,IACpD,CAAC;QAEL,MAAMI,WAAiC;YACrC,CAACH,QAAQ,EAAE;YACX,GAAGC,SAAS;QACd;QAEAL,KAAK,IAAI,CAACO;IACZ;IAEA,IAAIR,OACFC,KAAK,IAAI,CAAC;QACRD;IACF;IAGF,OAAOC;AACT;AAGO,MAAMQ,cAAcC,EAAE,MAAM,CAAC;IAClC,MAAMA,EAAE,MAAM;IACd,KAAKA,EAAE,MAAM;AACf;AAEO,MAAMC,aAAaD,EAAE,MAAM,CAAC;IACjC,OAAOA,EAAE,MAAM;IACf,QAAQA,EAAE,MAAM;IAChB,KAAKA,EAAE,MAAM,GAAG,QAAQ;AAC1B;AAEO,MAAME,aAAaH,YAAY,GAAG,CAACE,YAAY,GAAG,CACvDD,EAAE,MAAM,CAAC;IACP,MAAMA,EAAE,MAAM,GAAG,QAAQ;AAC3B;AAIK,MAAMG,0BAA0BH,EAAE,MAAM,CAAC;IAC9C,QAAQA,EAAAA,KACA,CACJA,EAAE,MAAM,CAAC;QACP,MAAMA,EAAE,MAAM;QACd,KAAKA,EAAE,MAAM;IACf,IAED,QAAQ;IACX,yBAAyBA,EAAE,OAAO,GAAG,QAAQ;AAC/C;AAGO,MAAMI,oBAAoBJ,EAAE,KAAK,CAAC;IACvCA,EAAE,MAAM;IACRA,EAAAA,MACS,CAAC;QACN,QAAQA,EAAE,MAAM;IAClB,GACC,GAAG,CAACG,wBAAwB,OAAO;CACvC;AAMD,MAAME,sBAAsB;AAG5B,MAAMC,wBAAwBN,EAAAA,MACrB,CAAC;IACN,QAAQI;IACR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,OAAO;KAAG,EAAE,QAAQ;AACpD,GACC,WAAW;AAGiBA,EAAAA,MACtB,CAAC;IACN,CAACK,oBAAoB,EAAEL,EAAE,OAAO,CAAC;IACjC,QAAQI;IAGR,WAAWJ,EAAE,OAAO,GAAG,QAAQ;IAC/B,WAAWA,EAAE,OAAO,GAAG,QAAQ;IAC/B,OAAOA,EAAE,OAAO,GAAG,QAAQ;IAG3B,QAAQA,EAAE,KAAK,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG;IACxC,MAAME;AACR,GACC,WAAW;AAYP,MAAMK,4BAA4B,IAChCD;AAGF,MAAME,yBAAyB,CAACC;QAGjCC,mBAKAC;IANJ,IAAIC,cAAcH;IAClB,IAAIC,AAAAA,SAAAA,CAAAA,oBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,kBAAkB,QAAQ,AAAD,MAAM,eACjCE,cAAcA,YAAY,IAAI,CAAC,SAAS;IAI1C,IAAID,AAAAA,SAAAA,CAAAA,qBAAAA,YAAY,IAAI,AAAD,IAAfA,KAAAA,IAAAA,mBAAkB,QAAQ,AAAD,MAAM,aAAa;QAC9C,MAAME,QAAQD,YAAY,IAAI,CAAC,KAAK;QAGpC,IAAIP,uBAAuBQ,OACzB,OAAO;QAKT,IAAI,YAAYA,SAASA,MAAM,MAAM,EACnC,OAAO;IAEX;IAEA,OAAO;AACT;AAEO,MAAMC,2BAA2B,CAACL;IACvC7E,OACE4E,uBAAuBC,QACvB;IAIF,IAAI,AAAiB,YAAjB,OAAOA,OACT,OAAOA;IAIT,IAAIA,SAAS,AAAiB,YAAjB,OAAOA,SAAsBA,MAAM,MAAM,EAAE;QAEtD,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,EACrB,OAAOA,MAAM,MAAM;QAGrB,IAAI,AAAwB,YAAxB,OAAOA,MAAM,MAAM,IAAiBA,MAAM,MAAM,CAAC,MAAM,EACzD,OAAOA,MAAM,MAAM,CAAC,MAAM;IAE9B;IAGA,OAAOM,OAAON;AAChB;AAEO,MAAMO,8BAA8B,CACzCC,SACAC;QAQIC;IANJ,IAAI,CAACF,SACH,OAAO,EAAE;IAIX,MAAMG,YAAYH;IAClB,IAAIE,AAAAA,SAAAA,CAAAA,kBAAAA,UAAU,IAAI,AAAD,IAAbA,KAAAA,IAAAA,gBAAgB,QAAQ,AAAD,MAAM,eAAeC,UAAU,KAAK,EAAE;QAC/D,MAAMC,OAAOC,OAAO,IAAI,CAACF,UAAU,KAAK;QACxC,OAAOC,KAAK,MAAM,CAAC,CAACE;YAClB,MAAMd,QAAQW,UAAU,KAAK,CAACG,IAAI;YAClC,IAAI,CAACf,uBAAuBC,QAC1B,OAAO;YAIT,IAAIS,cAAc;oBACTM;gBAAP,OAAOA,AAAAA,SAAAA,CAAAA,cAAAA,MAAM,IAAI,AAAD,IAATA,KAAAA,IAAAA,YAAY,QAAQ,AAAD,MAAM;YAClC;YAEA,OAAO;QACT;IACF;IAGA,OAAO,EAAE;AACX;AAEO,MAAM3B,kBAAkB,CAC7B4B,YACAC;IAEA,MAAMC,gBAAgBX,4BAA4BU;IAClD,MAAMjG,SAAS;QAAE,GAAGgG,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAapG,MAAM,CAACmG,UAAU;QACpC,IAAIC,YAEF;YAAA,IAAI,AAAsB,YAAtB,OAAOA,YACTpG,MAAM,CAACmG,UAAU,GAAGC;iBACf,IAAI,AAAsB,YAAtB,OAAOA,YAEhB;gBAAA,IAAIA,WAAW,MAAM,EAEnB;oBAAA,IAAI,AAA6B,YAA7B,OAAOA,WAAW,MAAM,EAC1BpG,MAAM,CAACmG,UAAU,GAAGC,WAAW,MAAM;yBAChC,IACL,AAA6B,YAA7B,OAAOA,WAAW,MAAM,IACxBA,WAAW,MAAM,CAAC,MAAM,EAGxBpG,MAAM,CAACmG,UAAU,GAAGC,WAAW,MAAM,CAAC,MAAM;gBAC9C;YACF;QACF;IAEJ;IAEA,OAAOpG;AACT;AAEO,MAAMqG,kBAAkB,CAC7BL,YACAC;IAEA,MAAMC,gBAAgBX,4BAA4BU;IAClD,MAAMjG,SAAS;QAAE,GAAGgG,UAAU;IAAC;IAE/B,KAAK,MAAMG,aAAaD,cAAe;QACrC,MAAME,aAAapG,MAAM,CAACmG,UAAU;QACpC,IAAIC,cAAc,AAAsB,YAAtB,OAAOA,YACvBpG,MAAM,CAACmG,UAAU,GAAG;YAClB,CAACvB,oBAAoB,EAAE;YACvB,QAAQwB;QACV;IAEJ;IAEA,OAAOpG;AACT;AAUO,MAAMsG,mBAAmB,CAC9BC,UACAN;IAGA,MAAMO,QAAQD,YAAY,CAAC;IAG3B,MAAME,eAAelB,4BAA4BU;IAGjD,IAAIQ,AAAwB,MAAxBA,aAAa,MAAM,EACrB,OAAOR,UAAU,KAAK,CAACO;IAIzB,MAAME,oBAAyC,CAAC;IAChD,KAAK,MAAMP,aAAaM,aACtB,IAAIN,aAAaK,OACfE,iBAAiB,CAACP,UAAU,GAAGK,KAAK,CAACL,UAAU;IAKnD,MAAMQ,sBAA2C,CAAC;IAClD,IAAK,MAAMb,OAAOU,MAChB,IAAIC,aAAa,QAAQ,CAACX,MAExBa,mBAAmB,CAACb,IAAI,GAAG;QAAE,QAAQ;IAAU;SAE/Ca,mBAAmB,CAACb,IAAI,GAAGU,KAAK,CAACV,IAAI;IAKzC,MAAMc,YAAYX,UAAU,KAAK,CAACU;IAGlC,IAAK,MAAMR,aAAaO,kBACtBE,SAAS,CAACT,UAAU,GAAGO,iBAAiB,CAACP,UAAU;IAGrD,OAAOS;AACT"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
function _define_property(obj, key, value) {
|
|
2
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
3
|
+
value: value,
|
|
4
|
+
enumerable: true,
|
|
5
|
+
configurable: true,
|
|
6
|
+
writable: true
|
|
7
|
+
});
|
|
8
|
+
else obj[key] = value;
|
|
9
|
+
return obj;
|
|
10
|
+
}
|
|
11
|
+
var _computedKey;
|
|
12
|
+
_computedKey = Symbol.iterator;
|
|
13
|
+
let _computedKey1 = _computedKey;
|
|
14
|
+
class ConversationHistory {
|
|
15
|
+
append(message) {
|
|
16
|
+
if ('user' === message.role) this.pruneOldestUserMessageIfNecessary();
|
|
17
|
+
this.messages.push(message);
|
|
18
|
+
}
|
|
19
|
+
seed(messages) {
|
|
20
|
+
this.reset();
|
|
21
|
+
messages.forEach((message)=>{
|
|
22
|
+
this.append(message);
|
|
23
|
+
});
|
|
24
|
+
}
|
|
25
|
+
reset() {
|
|
26
|
+
this.messages.length = 0;
|
|
27
|
+
}
|
|
28
|
+
snapshot() {
|
|
29
|
+
return [
|
|
30
|
+
...this.messages
|
|
31
|
+
];
|
|
32
|
+
}
|
|
33
|
+
get length() {
|
|
34
|
+
return this.messages.length;
|
|
35
|
+
}
|
|
36
|
+
[_computedKey1]() {
|
|
37
|
+
return this.messages[Symbol.iterator]();
|
|
38
|
+
}
|
|
39
|
+
toJSON() {
|
|
40
|
+
return this.snapshot();
|
|
41
|
+
}
|
|
42
|
+
pruneOldestUserMessageIfNecessary() {
|
|
43
|
+
const userMessages = this.messages.filter((item)=>'user' === item.role);
|
|
44
|
+
if (userMessages.length < this.maxUserImageMessages) return;
|
|
45
|
+
const firstUserMessageIndex = this.messages.findIndex((item)=>'user' === item.role);
|
|
46
|
+
if (firstUserMessageIndex >= 0) this.messages.splice(firstUserMessageIndex, 1);
|
|
47
|
+
}
|
|
48
|
+
constructor(options){
|
|
49
|
+
var _options_initialMessages;
|
|
50
|
+
_define_property(this, "maxUserImageMessages", void 0);
|
|
51
|
+
_define_property(this, "messages", []);
|
|
52
|
+
this.maxUserImageMessages = (null == options ? void 0 : options.maxUserImageMessages) ?? 4;
|
|
53
|
+
if (null == options ? void 0 : null == (_options_initialMessages = options.initialMessages) ? void 0 : _options_initialMessages.length) this.seed(options.initialMessages);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
export { ConversationHistory };
|
|
57
|
+
|
|
58
|
+
//# sourceMappingURL=conversation-history.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model\\conversation-history.mjs","sources":["webpack://@rpascene/core/./src/ai-model/conversation-history.ts"],"sourcesContent":["import type { ChatCompletionMessageParam } from 'openai/resources/index';\n\nexport interface ConversationHistoryOptions {\n maxUserImageMessages?: number;\n initialMessages?: ChatCompletionMessageParam[];\n}\n\nexport class ConversationHistory {\n private readonly maxUserImageMessages: number;\n private readonly messages: ChatCompletionMessageParam[] = [];\n\n constructor(options?: ConversationHistoryOptions) {\n this.maxUserImageMessages = options?.maxUserImageMessages ?? 4;\n if (options?.initialMessages?.length) {\n this.seed(options.initialMessages);\n }\n }\n\n append(message: ChatCompletionMessageParam) {\n if (message.role === 'user') {\n this.pruneOldestUserMessageIfNecessary();\n }\n\n this.messages.push(message);\n }\n\n seed(messages: ChatCompletionMessageParam[]) {\n this.reset();\n messages.forEach((message) => {\n this.append(message);\n });\n }\n\n reset() {\n this.messages.length = 0;\n }\n\n snapshot(): ChatCompletionMessageParam[] {\n return [...this.messages];\n }\n\n get length(): number {\n return this.messages.length;\n }\n\n [Symbol.iterator](): IterableIterator<ChatCompletionMessageParam> {\n return this.messages[Symbol.iterator]();\n }\n\n toJSON(): ChatCompletionMessageParam[] {\n return this.snapshot();\n }\n\n private pruneOldestUserMessageIfNecessary() {\n const userMessages = this.messages.filter((item) => item.role === 'user');\n if (userMessages.length < this.maxUserImageMessages) {\n return;\n }\n\n const firstUserMessageIndex = this.messages.findIndex(\n (item) => item.role === 'user',\n );\n\n if (firstUserMessageIndex >= 0) {\n this.messages.splice(firstUserMessageIndex, 1);\n }\n }\n}\n"],"names":["Symbol","ConversationHistory","message","messages","userMessages","item","firstUserMessageIndex","options","_options_initialMessages"],"mappings":";;;;;;;;;;;eA6CGA,OAAO,QAAQ;;AAtCX,MAAMC;IAWX,OAAOC,OAAmC,EAAE;QAC1C,IAAIA,AAAiB,WAAjBA,QAAQ,IAAI,EACd,IAAI,CAAC,iCAAiC;QAGxC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAACA;IACrB;IAEA,KAAKC,QAAsC,EAAE;QAC3C,IAAI,CAAC,KAAK;QACVA,SAAS,OAAO,CAAC,CAACD;YAChB,IAAI,CAAC,MAAM,CAACA;QACd;IACF;IAEA,QAAQ;QACN,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG;IACzB;IAEA,WAAyC;QACvC,OAAO;eAAI,IAAI,CAAC,QAAQ;SAAC;IAC3B;IAEA,IAAI,SAAiB;QACnB,OAAO,IAAI,CAAC,QAAQ,CAAC,MAAM;IAC7B;IAEA,CAAC,cAAD,GAAkE;QAChE,OAAO,IAAI,CAAC,QAAQ,CAACF,OAAO,QAAQ,CAAC;IACvC;IAEA,SAAuC;QACrC,OAAO,IAAI,CAAC,QAAQ;IACtB;IAEQ,oCAAoC;QAC1C,MAAMI,eAAe,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAACC,OAASA,AAAc,WAAdA,KAAK,IAAI;QAC7D,IAAID,aAAa,MAAM,GAAG,IAAI,CAAC,oBAAoB,EACjD;QAGF,MAAME,wBAAwB,IAAI,CAAC,QAAQ,CAAC,SAAS,CACnD,CAACD,OAASA,AAAc,WAAdA,KAAK,IAAI;QAGrB,IAAIC,yBAAyB,GAC3B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAACA,uBAAuB;IAEhD;IAvDA,YAAYC,OAAoC,CAAE;YAE5CC;QALN,uBAAiB,wBAAjB;QACA,uBAAiB,YAAyC,EAAE;QAG1D,IAAI,CAAC,oBAAoB,GAAGD,AAAAA,CAAAA,QAAAA,UAAAA,KAAAA,IAAAA,QAAS,oBAAoB,AAAD,KAAK;QAC7D,IAAIC,QAAAA,UAAAA,KAAAA,IAAAA,QAAAA,CAAAA,2BAAAA,QAAS,eAAe,AAAD,IAAvBA,KAAAA,IAAAA,yBAA0B,MAAM,EAClC,IAAI,CAAC,IAAI,CAACD,QAAQ,eAAe;IAErC;AAmDF"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { callAI, callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
|
|
2
|
+
import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
3
|
+
import { describeUserPage, elementByPositionWithElementInfo } from "./prompt/util.mjs";
|
|
4
|
+
import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
|
|
5
|
+
import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
|
|
6
|
+
import { AiExtractElementInfo, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
7
|
+
import { plan } from "./llm-planning.mjs";
|
|
8
|
+
import { AIActionType, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, dumpActionParam, findAllRpasceneLocatorField, getRpasceneLocationSchema, loadActionParam, parseActionParam } from "./common.mjs";
|
|
9
|
+
import { resizeImageForUiTars, uiTarsPlanning } from "./ui-tars-planning.mjs";
|
|
10
|
+
import { ConversationHistory } from "./conversation-history.mjs";
|
|
11
|
+
export { AIActionType, AiExtractElementInfo, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, callAI, callAIWithObjectResponse, callAIWithStringResponse, describeUserPage, dumpActionParam, elementByPositionWithElementInfo, findAllRpasceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getRpasceneLocationSchema, loadActionParam, parseActionParam, plan, resizeImageForUiTars, systemPromptToLocateElement, uiTarsPlanning };
|