@rpascene/core 0.30.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +636 -0
- package/dist/es/agent/agent.mjs.map +1 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-cache.mjs +184 -0
- package/dist/es/agent/task-cache.mjs.map +1 -0
- package/dist/es/agent/tasks.mjs +666 -0
- package/dist/es/agent/tasks.mjs.map +1 -0
- package/dist/es/agent/ui-utils.mjs +72 -0
- package/dist/es/agent/ui-utils.mjs.map +1 -0
- package/dist/es/agent/utils.mjs +162 -0
- package/dist/es/agent/utils.mjs.map +1 -0
- package/dist/es/ai-model/action-executor.mjs +129 -0
- package/dist/es/ai-model/action-executor.mjs.map +1 -0
- package/dist/es/ai-model/common.mjs +355 -0
- package/dist/es/ai-model/common.mjs.map +1 -0
- package/dist/es/ai-model/conversation-history.mjs +58 -0
- package/dist/es/ai-model/conversation-history.mjs.map +1 -0
- package/dist/es/ai-model/index.mjs +11 -0
- package/dist/es/ai-model/inspect.mjs +286 -0
- package/dist/es/ai-model/inspect.mjs.map +1 -0
- package/dist/es/ai-model/llm-planning.mjs +140 -0
- package/dist/es/ai-model/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/assertion.mjs +31 -0
- package/dist/es/ai-model/prompt/assertion.mjs.map +1 -0
- package/dist/es/ai-model/prompt/common.mjs +7 -0
- package/dist/es/ai-model/prompt/common.mjs.map +1 -0
- package/dist/es/ai-model/prompt/describe.mjs +44 -0
- package/dist/es/ai-model/prompt/describe.mjs.map +1 -0
- package/dist/es/ai-model/prompt/extraction.mjs +140 -0
- package/dist/es/ai-model/prompt/extraction.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +275 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +367 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +47 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +117 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs +34 -0
- package/dist/es/ai-model/prompt/ui-tars-locator.mjs.map +1 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +36 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs.map +1 -0
- package/dist/es/ai-model/prompt/util.mjs +124 -0
- package/dist/es/ai-model/prompt/util.mjs.map +1 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +219 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs.map +1 -0
- package/dist/es/ai-model/service-caller/index.mjs +537 -0
- package/dist/es/ai-model/service-caller/index.mjs.map +1 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +201 -0
- package/dist/es/ai-model/ui-tars-planning.mjs.map +1 -0
- package/dist/es/device/index.mjs +152 -0
- package/dist/es/device/index.mjs.map +1 -0
- package/dist/es/image/index.mjs +2 -0
- package/dist/es/index.mjs +11 -0
- package/dist/es/index.mjs.map +1 -0
- package/dist/es/insight/index.mjs +233 -0
- package/dist/es/insight/index.mjs.map +1 -0
- package/dist/es/insight/utils.mjs +15 -0
- package/dist/es/insight/utils.mjs.map +1 -0
- package/dist/es/report.mjs +88 -0
- package/dist/es/report.mjs.map +1 -0
- package/dist/es/tree.mjs +2 -0
- package/dist/es/types.mjs +11 -0
- package/dist/es/types.mjs.map +1 -0
- package/dist/es/utils.mjs +204 -0
- package/dist/es/utils.mjs.map +1 -0
- package/dist/es/yaml/builder.mjs +13 -0
- package/dist/es/yaml/builder.mjs.map +1 -0
- package/dist/es/yaml/index.mjs +3 -0
- package/dist/es/yaml/player.mjs +372 -0
- package/dist/es/yaml/player.mjs.map +1 -0
- package/dist/es/yaml/utils.mjs +73 -0
- package/dist/es/yaml/utils.mjs.map +1 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +683 -0
- package/dist/lib/agent/agent.js.map +1 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/index.js +81 -0
- package/dist/lib/agent/index.js.map +1 -0
- package/dist/lib/agent/task-cache.js +236 -0
- package/dist/lib/agent/task-cache.js.map +1 -0
- package/dist/lib/agent/tasks.js +703 -0
- package/dist/lib/agent/tasks.js.map +1 -0
- package/dist/lib/agent/ui-utils.js +121 -0
- package/dist/lib/agent/ui-utils.js.map +1 -0
- package/dist/lib/agent/utils.js +233 -0
- package/dist/lib/agent/utils.js.map +1 -0
- package/dist/lib/ai-model/action-executor.js +163 -0
- package/dist/lib/ai-model/action-executor.js.map +1 -0
- package/dist/lib/ai-model/common.js +461 -0
- package/dist/lib/ai-model/common.js.map +1 -0
- package/dist/lib/ai-model/conversation-history.js +92 -0
- package/dist/lib/ai-model/conversation-history.js.map +1 -0
- package/dist/lib/ai-model/index.js +131 -0
- package/dist/lib/ai-model/index.js.map +1 -0
- package/dist/lib/ai-model/inspect.js +326 -0
- package/dist/lib/ai-model/inspect.js.map +1 -0
- package/dist/lib/ai-model/llm-planning.js +174 -0
- package/dist/lib/ai-model/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/assertion.js +65 -0
- package/dist/lib/ai-model/prompt/assertion.js.map +1 -0
- package/dist/lib/ai-model/prompt/common.js +41 -0
- package/dist/lib/ai-model/prompt/common.js.map +1 -0
- package/dist/lib/ai-model/prompt/describe.js +78 -0
- package/dist/lib/ai-model/prompt/describe.js.map +1 -0
- package/dist/lib/ai-model/prompt/extraction.js +180 -0
- package/dist/lib/ai-model/prompt/extraction.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +315 -0
- package/dist/lib/ai-model/prompt/llm-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +407 -0
- package/dist/lib/ai-model/prompt/llm-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +84 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +178 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js +68 -0
- package/dist/lib/ai-model/prompt/ui-tars-locator.js.map +1 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +73 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js.map +1 -0
- package/dist/lib/ai-model/prompt/util.js +176 -0
- package/dist/lib/ai-model/prompt/util.js.map +1 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +280 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js.map +1 -0
- package/dist/lib/ai-model/service-caller/index.js +623 -0
- package/dist/lib/ai-model/service-caller/index.js.map +1 -0
- package/dist/lib/ai-model/ui-tars-planning.js +238 -0
- package/dist/lib/ai-model/ui-tars-planning.js.map +1 -0
- package/dist/lib/device/index.js +255 -0
- package/dist/lib/device/index.js.map +1 -0
- package/dist/lib/image/index.js +56 -0
- package/dist/lib/image/index.js.map +1 -0
- package/dist/lib/index.js +103 -0
- package/dist/lib/index.js.map +1 -0
- package/dist/lib/insight/index.js +267 -0
- package/dist/lib/insight/index.js.map +1 -0
- package/dist/lib/insight/utils.js +49 -0
- package/dist/lib/insight/utils.js.map +1 -0
- package/dist/lib/report.js +122 -0
- package/dist/lib/report.js.map +1 -0
- package/dist/lib/tree.js +44 -0
- package/dist/lib/tree.js.map +1 -0
- package/dist/lib/types.js +82 -0
- package/dist/lib/types.js.map +1 -0
- package/dist/lib/utils.js +283 -0
- package/dist/lib/utils.js.map +1 -0
- package/dist/lib/yaml/builder.js +57 -0
- package/dist/lib/yaml/builder.js.map +1 -0
- package/dist/lib/yaml/index.js +80 -0
- package/dist/lib/yaml/index.js.map +1 -0
- package/dist/lib/yaml/player.js +406 -0
- package/dist/lib/yaml/player.js.map +1 -0
- package/dist/lib/yaml/utils.js +126 -0
- package/dist/lib/yaml/utils.js.map +1 -0
- package/dist/lib/yaml.js +20 -0
- package/dist/lib/yaml.js.map +1 -0
- package/dist/types/agent/agent.d.ts +156 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-cache.d.ts +48 -0
- package/dist/types/agent/tasks.d.ts +48 -0
- package/dist/types/agent/ui-utils.d.ts +7 -0
- package/dist/types/agent/utils.d.ts +52 -0
- package/dist/types/ai-model/action-executor.d.ts +19 -0
- package/dist/types/ai-model/common.d.ts +569 -0
- package/dist/types/ai-model/conversation-history.d.ts +18 -0
- package/dist/types/ai-model/index.d.ts +13 -0
- package/dist/types/ai-model/inspect.d.ts +46 -0
- package/dist/types/ai-model/llm-planning.d.ts +11 -0
- package/dist/types/ai-model/prompt/assertion.d.ts +2 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +4 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +9 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +6 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-locator.d.ts +1 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +47 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +100 -0
- package/dist/types/ai-model/service-caller/index.d.ts +48 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +59 -0
- package/dist/types/device/index.d.ts +2158 -0
- package/dist/types/image/index.d.ts +1 -0
- package/dist/types/index.d.ts +12 -0
- package/dist/types/insight/index.d.ts +31 -0
- package/dist/types/insight/utils.d.ts +2 -0
- package/dist/types/report.d.ts +12 -0
- package/dist/types/tree.d.ts +1 -0
- package/dist/types/types.d.ts +414 -0
- package/dist/types/utils.d.ts +40 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +3 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +178 -0
- package/package.json +108 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
import { UITarsModelVersion } from "@rpascene/shared/env";
|
|
2
|
+
import { resizeImgBase64 } from "@rpascene/shared/img";
|
|
3
|
+
import { getDebug } from "@rpascene/shared/logger";
|
|
4
|
+
import { transformHotkeyInput } from "@rpascene/shared/us-keyboard-layout";
|
|
5
|
+
import { assert } from "@rpascene/shared/utils";
|
|
6
|
+
import { actionParser } from "@ui-tars/action-parser";
|
|
7
|
+
import { AIActionType } from "./common.mjs";
|
|
8
|
+
import { getSummary, getUiTarsPlanningPrompt } from "./prompt/ui-tars-planning.mjs";
|
|
9
|
+
import { callAIWithStringResponse } from "./service-caller/index.mjs";
|
|
10
|
+
const debug = getDebug('ui-tars-planning');
|
|
11
|
+
const bboxSize = 10;
|
|
12
|
+
const pointToBbox = (point, width, height)=>[
|
|
13
|
+
Math.round(Math.max(point.x - bboxSize / 2, 0)),
|
|
14
|
+
Math.round(Math.max(point.y - bboxSize / 2, 0)),
|
|
15
|
+
Math.round(Math.min(point.x + bboxSize / 2, width)),
|
|
16
|
+
Math.round(Math.min(point.y + bboxSize / 2, height))
|
|
17
|
+
];
|
|
18
|
+
async function uiTarsPlanning(userInstruction, options) {
|
|
19
|
+
const { conversationHistory, context, modelConfig } = options;
|
|
20
|
+
const { uiTarsModelVersion } = modelConfig;
|
|
21
|
+
const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;
|
|
22
|
+
const imagePayload = await resizeImageForUiTars(context.screenshotBase64, context.size, uiTarsModelVersion);
|
|
23
|
+
conversationHistory.append({
|
|
24
|
+
role: 'user',
|
|
25
|
+
content: [
|
|
26
|
+
{
|
|
27
|
+
type: 'image_url',
|
|
28
|
+
image_url: {
|
|
29
|
+
url: imagePayload
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
]
|
|
33
|
+
});
|
|
34
|
+
const res = await callAIWithStringResponse([
|
|
35
|
+
{
|
|
36
|
+
role: 'user',
|
|
37
|
+
content: systemPrompt
|
|
38
|
+
},
|
|
39
|
+
...conversationHistory.snapshot()
|
|
40
|
+
], AIActionType.INSPECT_ELEMENT, modelConfig);
|
|
41
|
+
const convertedText = convertBboxToCoordinates(res.content);
|
|
42
|
+
const { size } = context;
|
|
43
|
+
const { parsed } = actionParser({
|
|
44
|
+
prediction: convertedText,
|
|
45
|
+
factor: [
|
|
46
|
+
1000,
|
|
47
|
+
1000
|
|
48
|
+
],
|
|
49
|
+
screenContext: {
|
|
50
|
+
width: size.width,
|
|
51
|
+
height: size.height
|
|
52
|
+
},
|
|
53
|
+
modelVer: uiTarsModelVersion
|
|
54
|
+
});
|
|
55
|
+
debug('ui-tars modelVer', uiTarsModelVersion, ', parsed', JSON.stringify(parsed));
|
|
56
|
+
const transformActions = [];
|
|
57
|
+
let shouldContinue = true;
|
|
58
|
+
parsed.forEach((action)=>{
|
|
59
|
+
const actionType = (action.action_type || '').toLowerCase();
|
|
60
|
+
if ('click' === actionType) {
|
|
61
|
+
assert(action.action_inputs.start_box, 'start_box is required');
|
|
62
|
+
const point = getPoint(action.action_inputs.start_box, size);
|
|
63
|
+
transformActions.push({
|
|
64
|
+
type: 'Tap',
|
|
65
|
+
param: {
|
|
66
|
+
locate: {
|
|
67
|
+
prompt: action.thought || '',
|
|
68
|
+
bbox: pointToBbox({
|
|
69
|
+
x: point[0],
|
|
70
|
+
y: point[1]
|
|
71
|
+
}, size.width, size.height)
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
} else if ('drag' === actionType) {
|
|
76
|
+
assert(action.action_inputs.start_box, 'start_box is required');
|
|
77
|
+
assert(action.action_inputs.end_box, 'end_box is required');
|
|
78
|
+
const startPoint = getPoint(action.action_inputs.start_box, size);
|
|
79
|
+
const endPoint = getPoint(action.action_inputs.end_box, size);
|
|
80
|
+
transformActions.push({
|
|
81
|
+
type: 'DragAndDrop',
|
|
82
|
+
param: {
|
|
83
|
+
from: {
|
|
84
|
+
prompt: action.thought || '',
|
|
85
|
+
bbox: pointToBbox({
|
|
86
|
+
x: startPoint[0],
|
|
87
|
+
y: startPoint[1]
|
|
88
|
+
}, size.width, size.height)
|
|
89
|
+
},
|
|
90
|
+
to: {
|
|
91
|
+
prompt: action.thought || '',
|
|
92
|
+
bbox: pointToBbox({
|
|
93
|
+
x: endPoint[0],
|
|
94
|
+
y: endPoint[1]
|
|
95
|
+
}, size.width, size.height)
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
thought: action.thought || ''
|
|
99
|
+
});
|
|
100
|
+
} else if ('type' === actionType) transformActions.push({
|
|
101
|
+
type: 'Input',
|
|
102
|
+
param: {
|
|
103
|
+
value: action.action_inputs.content
|
|
104
|
+
},
|
|
105
|
+
thought: action.thought || ''
|
|
106
|
+
});
|
|
107
|
+
else if ('scroll' === actionType) transformActions.push({
|
|
108
|
+
type: 'Scroll',
|
|
109
|
+
param: {
|
|
110
|
+
direction: action.action_inputs.direction
|
|
111
|
+
},
|
|
112
|
+
thought: action.thought || ''
|
|
113
|
+
});
|
|
114
|
+
else if ('finished' === actionType) {
|
|
115
|
+
shouldContinue = false;
|
|
116
|
+
transformActions.push({
|
|
117
|
+
type: 'Finished',
|
|
118
|
+
param: {},
|
|
119
|
+
thought: action.thought || ''
|
|
120
|
+
});
|
|
121
|
+
} else if ('hotkey' === actionType) if (action.action_inputs.key) {
|
|
122
|
+
const keys = transformHotkeyInput(action.action_inputs.key);
|
|
123
|
+
transformActions.push({
|
|
124
|
+
type: 'KeyboardPress',
|
|
125
|
+
param: {
|
|
126
|
+
keyName: keys
|
|
127
|
+
},
|
|
128
|
+
thought: action.thought || ''
|
|
129
|
+
});
|
|
130
|
+
} else console.warn('No key found in action: hotkey. Will not perform action.');
|
|
131
|
+
else if ('wait' === actionType) transformActions.push({
|
|
132
|
+
type: 'Sleep',
|
|
133
|
+
param: {
|
|
134
|
+
timeMs: 1000
|
|
135
|
+
},
|
|
136
|
+
thought: action.thought || ''
|
|
137
|
+
});
|
|
138
|
+
});
|
|
139
|
+
if (0 === transformActions.length) throw new Error(`No actions found, response: ${res.content}`, {
|
|
140
|
+
cause: {
|
|
141
|
+
prediction: res.content,
|
|
142
|
+
parsed
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
debug('transformActions', JSON.stringify(transformActions, null, 2));
|
|
146
|
+
const log = getSummary(res.content);
|
|
147
|
+
conversationHistory.append({
|
|
148
|
+
role: 'assistant',
|
|
149
|
+
content: log
|
|
150
|
+
});
|
|
151
|
+
return {
|
|
152
|
+
actions: transformActions,
|
|
153
|
+
log,
|
|
154
|
+
usage: res.usage,
|
|
155
|
+
rawResponse: JSON.stringify(res.content, void 0, 2),
|
|
156
|
+
more_actions_needed_by_instruction: shouldContinue
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
function convertBboxToCoordinates(text) {
|
|
160
|
+
const pattern = /<bbox>(\d+)\s+(\d+)\s+(\d+)\s+(\d+)<\/bbox>/g;
|
|
161
|
+
function replaceMatch(match, x1, y1, x2, y2) {
|
|
162
|
+
const x1Num = Number.parseInt(x1, 10);
|
|
163
|
+
const y1Num = Number.parseInt(y1, 10);
|
|
164
|
+
const x2Num = Number.parseInt(x2, 10);
|
|
165
|
+
const y2Num = Number.parseInt(y2, 10);
|
|
166
|
+
const x = Math.floor((x1Num + x2Num) / 2);
|
|
167
|
+
const y = Math.floor((y1Num + y2Num) / 2);
|
|
168
|
+
return `(${x},${y})`;
|
|
169
|
+
}
|
|
170
|
+
const cleanedText = text.replace(/\[EOS\]/g, '');
|
|
171
|
+
return cleanedText.replace(pattern, replaceMatch).trim();
|
|
172
|
+
}
|
|
173
|
+
function getPoint(startBox, size) {
|
|
174
|
+
const [x, y] = JSON.parse(startBox);
|
|
175
|
+
return [
|
|
176
|
+
x * size.width,
|
|
177
|
+
y * size.height
|
|
178
|
+
];
|
|
179
|
+
}
|
|
180
|
+
async function resizeImageForUiTars(imageBase64, size, uiTarsVersion) {
|
|
181
|
+
if (uiTarsVersion === UITarsModelVersion.V1_5) {
|
|
182
|
+
debug('ui-tars-v1.5, will check image size', size);
|
|
183
|
+
const currentPixels = size.width * size.height;
|
|
184
|
+
const maxPixels = 12845056;
|
|
185
|
+
if (currentPixels > maxPixels) {
|
|
186
|
+
const resizeFactor = Math.sqrt(maxPixels / currentPixels);
|
|
187
|
+
const newWidth = Math.floor(size.width * resizeFactor);
|
|
188
|
+
const newHeight = Math.floor(size.height * resizeFactor);
|
|
189
|
+
debug('resize image for ui-tars, new width: %s, new height: %s', newWidth, newHeight);
|
|
190
|
+
const resizedImage = await resizeImgBase64(imageBase64, {
|
|
191
|
+
width: newWidth,
|
|
192
|
+
height: newHeight
|
|
193
|
+
});
|
|
194
|
+
return resizedImage;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
return imageBase64;
|
|
198
|
+
}
|
|
199
|
+
export { resizeImageForUiTars, uiTarsPlanning };
|
|
200
|
+
|
|
201
|
+
//# sourceMappingURL=ui-tars-planning.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ai-model\\ui-tars-planning.mjs","sources":["webpack://@rpascene/core/./src/ai-model/ui-tars-planning.ts"],"sourcesContent":["import type {\n PlanningAIResponse,\n PlanningAction,\n Size,\n UIContext,\n} from '@/types';\nimport { type IModelConfig, UITarsModelVersion } from '@rpascene/shared/env';\nimport { resizeImgBase64 } from '@rpascene/shared/img';\nimport { getDebug } from '@rpascene/shared/logger';\nimport { transformHotkeyInput } from '@rpascene/shared/us-keyboard-layout';\nimport { assert } from '@rpascene/shared/utils';\nimport { actionParser } from '@ui-tars/action-parser';\nimport { AIActionType } from './common';\nimport type { ConversationHistory } from './conversation-history';\nimport { getSummary, getUiTarsPlanningPrompt } from './prompt/ui-tars-planning';\nimport { callAIWithStringResponse } from './service-caller/index';\ntype ActionType =\n | 'click'\n | 'drag'\n | 'type'\n | 'hotkey'\n | 'finished'\n | 'scroll'\n | 'wait';\n\nconst debug = getDebug('ui-tars-planning');\nconst bboxSize = 10;\nconst pointToBbox = (\n point: { x: number; y: number },\n width: number,\n height: number,\n): [number, number, number, number] => {\n return [\n Math.round(Math.max(point.x - bboxSize / 2, 0)),\n Math.round(Math.max(point.y - bboxSize / 2, 0)),\n Math.round(Math.min(point.x + bboxSize / 2, width)),\n Math.round(Math.min(point.y + bboxSize / 2, height)),\n ];\n};\n\nexport async function uiTarsPlanning(\n userInstruction: string,\n options: {\n conversationHistory: ConversationHistory;\n context: UIContext;\n modelConfig: IModelConfig;\n },\n): Promise<PlanningAIResponse> {\n const { conversationHistory, context, modelConfig } = options;\n const { uiTarsModelVersion } = modelConfig;\n const systemPrompt = getUiTarsPlanningPrompt() + userInstruction;\n\n const imagePayload = await resizeImageForUiTars(\n context.screenshotBase64,\n context.size,\n uiTarsModelVersion,\n );\n\n conversationHistory.append({\n role: 'user',\n content: [\n {\n type: 'image_url',\n image_url: {\n url: imagePayload,\n },\n },\n ],\n });\n\n const res = await callAIWithStringResponse(\n [\n {\n role: 'user',\n content: systemPrompt,\n },\n ...conversationHistory.snapshot(),\n ],\n AIActionType.INSPECT_ELEMENT,\n modelConfig,\n );\n const convertedText = convertBboxToCoordinates(res.content);\n\n const { size } = context;\n const { parsed } = actionParser({\n prediction: convertedText,\n factor: [1000, 1000],\n screenContext: {\n width: size.width,\n height: size.height,\n },\n modelVer: uiTarsModelVersion,\n });\n\n debug(\n 'ui-tars modelVer',\n uiTarsModelVersion,\n ', parsed',\n JSON.stringify(parsed),\n );\n\n const transformActions: PlanningAction[] = [];\n let shouldContinue = true;\n parsed.forEach((action) => {\n const actionType = (action.action_type || '').toLowerCase();\n if (actionType === 'click') {\n assert(action.action_inputs.start_box, 'start_box is required');\n const point = getPoint(action.action_inputs.start_box, size);\n transformActions.push({\n type: 'Tap',\n param: {\n locate: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: point[0], y: point[1] },\n size.width,\n size.height,\n ),\n },\n },\n });\n } else if (actionType === 'drag') {\n assert(action.action_inputs.start_box, 'start_box is required');\n assert(action.action_inputs.end_box, 'end_box is required');\n const startPoint = getPoint(action.action_inputs.start_box, size);\n const endPoint = getPoint(action.action_inputs.end_box, size);\n transformActions.push({\n type: 'DragAndDrop',\n param: {\n from: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: startPoint[0], y: startPoint[1] },\n size.width,\n size.height,\n ),\n },\n to: {\n prompt: action.thought || '',\n bbox: pointToBbox(\n { x: endPoint[0], y: endPoint[1] },\n size.width,\n size.height,\n ),\n },\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'type') {\n transformActions.push({\n type: 'Input',\n param: {\n value: action.action_inputs.content,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'scroll') {\n transformActions.push({\n type: 'Scroll',\n param: {\n direction: action.action_inputs.direction,\n },\n thought: action.thought || '',\n });\n } else if (actionType === 'finished') {\n shouldContinue = false;\n transformActions.push({\n type: 'Finished',\n param: {},\n thought: action.thought || '',\n });\n } else if (actionType === 'hotkey') {\n if (!action.action_inputs.key) {\n console.warn(\n 'No key found in action: hotkey. Will not perform action.',\n );\n } else {\n const keys = transformHotkeyInput(action.action_inputs.key);\n\n transformActions.push({\n type: 'KeyboardPress',\n param: {\n keyName: keys,\n },\n thought: action.thought || '',\n });\n }\n } else if (actionType === 'wait') {\n transformActions.push({\n type: 'Sleep',\n param: {\n timeMs: 1000,\n },\n thought: action.thought || '',\n });\n }\n });\n\n if (transformActions.length === 0) {\n throw new Error(`No actions found, response: ${res.content}`, {\n cause: {\n prediction: res.content,\n parsed,\n },\n });\n }\n\n debug('transformActions', JSON.stringify(transformActions, null, 2));\n const log = getSummary(res.content);\n\n conversationHistory.append({\n role: 'assistant',\n content: log,\n });\n\n return {\n actions: transformActions,\n log,\n usage: res.usage,\n rawResponse: JSON.stringify(res.content, undefined, 2),\n more_actions_needed_by_instruction: shouldContinue,\n };\n}\n\n/**\n * Converts bounding box notation to coordinate points\n * @param text - The text containing bbox tags to be converted\n * @returns The text with bbox tags replaced by coordinate points\n */\nfunction convertBboxToCoordinates(text: string): string {\n // Match the four numbers after <bbox>\n const pattern = /<bbox>(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)<\\/bbox>/g;\n\n function replaceMatch(\n match: string,\n x1: string,\n y1: string,\n x2: string,\n y2: string,\n ): string {\n // Convert strings to numbers and calculate center point\n const x1Num = Number.parseInt(x1, 10);\n const y1Num = Number.parseInt(y1, 10);\n const x2Num = Number.parseInt(x2, 10);\n const y2Num = Number.parseInt(y2, 10);\n\n // Use Math.floor to truncate and calculate center point\n const x = Math.floor((x1Num + x2Num) / 2);\n const y = Math.floor((y1Num + y2Num) / 2);\n\n // Return formatted coordinate string\n return `(${x},${y})`;\n }\n\n // Remove [EOS] and replace <bbox> coordinates\n const cleanedText = text.replace(/\\[EOS\\]/g, '');\n return cleanedText.replace(pattern, replaceMatch).trim();\n}\n\nfunction getPoint(startBox: string, size: { width: number; height: number }) {\n const [x, y] = JSON.parse(startBox);\n return [x * size.width, y * size.height];\n}\n\ninterface BaseAction {\n action_type: ActionType;\n action_inputs: Record<string, any>;\n reflection: string | null;\n thought: string | null;\n}\n\ninterface ClickAction extends BaseAction {\n action_type: 'click';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface DragAction extends BaseAction {\n action_type: 'drag';\n action_inputs: {\n start_box: string; // JSON string of [x, y] coordinates\n end_box: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface WaitAction extends BaseAction {\n action_type: 'wait';\n action_inputs: {\n time: string; // JSON string of [x, y] coordinates\n };\n}\n\ninterface TypeAction extends BaseAction {\n action_type: 'type';\n action_inputs: {\n content: string;\n };\n}\n\ninterface HotkeyAction extends BaseAction {\n action_type: 'hotkey';\n action_inputs: {\n key: string;\n };\n}\n\ninterface ScrollAction extends BaseAction {\n action_type: 'scroll';\n action_inputs: {\n direction: 'up' | 'down';\n };\n}\n\ninterface FinishedAction extends BaseAction {\n action_type: 'finished';\n action_inputs: Record<string, never>;\n}\n\nexport type Action =\n | ClickAction\n | DragAction\n | TypeAction\n | HotkeyAction\n | ScrollAction\n | FinishedAction\n | WaitAction;\n\nexport async function resizeImageForUiTars(\n imageBase64: string,\n size: Size,\n uiTarsVersion: UITarsModelVersion | undefined,\n) {\n if (uiTarsVersion === UITarsModelVersion.V1_5) {\n debug('ui-tars-v1.5, will check image size', size);\n const currentPixels = size.width * size.height;\n const maxPixels = 16384 * 28 * 28; //\n if (currentPixels > maxPixels) {\n const resizeFactor = Math.sqrt(maxPixels / currentPixels);\n const newWidth = Math.floor(size.width * resizeFactor);\n const newHeight = Math.floor(size.height * resizeFactor);\n debug(\n 'resize image for ui-tars, new width: %s, new height: %s',\n newWidth,\n newHeight,\n );\n const resizedImage = await resizeImgBase64(imageBase64, {\n width: newWidth,\n height: newHeight,\n });\n return resizedImage;\n }\n }\n return imageBase64;\n}\n"],"names":["debug","getDebug","bboxSize","pointToBbox","point","width","height","Math","uiTarsPlanning","userInstruction","options","conversationHistory","context","modelConfig","uiTarsModelVersion","systemPrompt","getUiTarsPlanningPrompt","imagePayload","resizeImageForUiTars","res","callAIWithStringResponse","AIActionType","convertedText","convertBboxToCoordinates","size","parsed","actionParser","JSON","transformActions","shouldContinue","action","actionType","assert","getPoint","startPoint","endPoint","keys","transformHotkeyInput","console","Error","log","getSummary","undefined","text","pattern","replaceMatch","match","x1","y1","x2","y2","x1Num","Number","y1Num","x2Num","y2Num","x","y","cleanedText","startBox","imageBase64","uiTarsVersion","UITarsModelVersion","currentPixels","maxPixels","resizeFactor","newWidth","newHeight","resizedImage","resizeImgBase64"],"mappings":";;;;;;;;;AAyBA,MAAMA,QAAQC,SAAS;AACvB,MAAMC,WAAW;AACjB,MAAMC,cAAc,CAClBC,OACAC,OACAC,SAEO;QACLC,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAG;QAC5CK,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGG;QAC5CE,KAAK,KAAK,CAACA,KAAK,GAAG,CAACH,MAAM,CAAC,GAAGF,WAAW,GAAGI;KAC7C;AAGI,eAAeE,eACpBC,eAAuB,EACvBC,OAIC;IAED,MAAM,EAAEC,mBAAmB,EAAEC,OAAO,EAAEC,WAAW,EAAE,GAAGH;IACtD,MAAM,EAAEI,kBAAkB,EAAE,GAAGD;IAC/B,MAAME,eAAeC,4BAA4BP;IAEjD,MAAMQ,eAAe,MAAMC,qBACzBN,QAAQ,gBAAgB,EACxBA,QAAQ,IAAI,EACZE;IAGFH,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS;YACP;gBACE,MAAM;gBACN,WAAW;oBACT,KAAKM;gBACP;YACF;SACD;IACH;IAEA,MAAME,MAAM,MAAMC,yBAChB;QACE;YACE,MAAM;YACN,SAASL;QACX;WACGJ,oBAAoB,QAAQ;KAChC,EACDU,aAAa,eAAe,EAC5BR;IAEF,MAAMS,gBAAgBC,yBAAyBJ,IAAI,OAAO;IAE1D,MAAM,EAAEK,IAAI,EAAE,GAAGZ;IACjB,MAAM,EAAEa,MAAM,EAAE,GAAGC,aAAa;QAC9B,YAAYJ;QACZ,QAAQ;YAAC;YAAM;SAAK;QACpB,eAAe;YACb,OAAOE,KAAK,KAAK;YACjB,QAAQA,KAAK,MAAM;QACrB;QACA,UAAUV;IACZ;IAEAd,MACE,oBACAc,oBACA,YACAa,KAAK,SAAS,CAACF;IAGjB,MAAMG,mBAAqC,EAAE;IAC7C,IAAIC,iBAAiB;IACrBJ,OAAO,OAAO,CAAC,CAACK;QACd,MAAMC,aAAcD,AAAAA,CAAAA,OAAO,WAAW,IAAI,EAAC,EAAG,WAAW;QACzD,IAAIC,AAAe,YAAfA,YAAwB;YAC1BC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvC,MAAM1B,QAAQ6B,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEN;YACvDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,QAAQ;wBACN,QAAQE,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAGC,KAAK,CAAC,EAAE;4BAAE,GAAGA,KAAK,CAAC,EAAE;wBAAC,GAC3BoB,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;YACF;QACF,OAAO,IAAIO,AAAe,WAAfA,YAAuB;YAChCC,OAAOF,OAAO,aAAa,CAAC,SAAS,EAAE;YACvCE,OAAOF,OAAO,aAAa,CAAC,OAAO,EAAE;YACrC,MAAMI,aAAaD,SAASH,OAAO,aAAa,CAAC,SAAS,EAAEN;YAC5D,MAAMW,WAAWF,SAASH,OAAO,aAAa,CAAC,OAAO,EAAEN;YACxDI,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,MAAM;wBACJ,QAAQE,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAG+B,UAAU,CAAC,EAAE;4BAAE,GAAGA,UAAU,CAAC,EAAE;wBAAC,GACrCV,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;oBACA,IAAI;wBACF,QAAQM,OAAO,OAAO,IAAI;wBAC1B,MAAM3B,YACJ;4BAAE,GAAGgC,QAAQ,CAAC,EAAE;4BAAE,GAAGA,QAAQ,CAAC,EAAE;wBAAC,GACjCX,KAAK,KAAK,EACVA,KAAK,MAAM;oBAEf;gBACF;gBACA,SAASM,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,WAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,OAAOE,OAAO,aAAa,CAAC,OAAO;YACrC;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,aAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,WAAWE,OAAO,aAAa,CAAC,SAAS;YAC3C;YACA,SAASA,OAAO,OAAO,IAAI;QAC7B;aACK,IAAIC,AAAe,eAAfA,YAA2B;YACpCF,iBAAiB;YACjBD,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO,CAAC;gBACR,SAASE,OAAO,OAAO,IAAI;YAC7B;QACF,OAAO,IAAIC,AAAe,aAAfA,YACT,IAAKD,OAAO,aAAa,CAAC,GAAG,EAItB;YACL,MAAMM,OAAOC,qBAAqBP,OAAO,aAAa,CAAC,GAAG;YAE1DF,iBAAiB,IAAI,CAAC;gBACpB,MAAM;gBACN,OAAO;oBACL,SAASQ;gBACX;gBACA,SAASN,OAAO,OAAO,IAAI;YAC7B;QACF,OAbEQ,QAAQ,IAAI,CACV;aAaC,IAAIP,AAAe,WAAfA,YACTH,iBAAiB,IAAI,CAAC;YACpB,MAAM;YACN,OAAO;gBACL,QAAQ;YACV;YACA,SAASE,OAAO,OAAO,IAAI;QAC7B;IAEJ;IAEA,IAAIF,AAA4B,MAA5BA,iBAAiB,MAAM,EACzB,MAAM,IAAIW,MAAM,CAAC,4BAA4B,EAAEpB,IAAI,OAAO,EAAE,EAAE;QAC5D,OAAO;YACL,YAAYA,IAAI,OAAO;YACvBM;QACF;IACF;IAGFzB,MAAM,oBAAoB2B,KAAK,SAAS,CAACC,kBAAkB,MAAM;IACjE,MAAMY,MAAMC,WAAWtB,IAAI,OAAO;IAElCR,oBAAoB,MAAM,CAAC;QACzB,MAAM;QACN,SAAS6B;IACX;IAEA,OAAO;QACL,SAASZ;QACTY;QACA,OAAOrB,IAAI,KAAK;QAChB,aAAaQ,KAAK,SAAS,CAACR,IAAI,OAAO,EAAEuB,QAAW;QACpD,oCAAoCb;IACtC;AACF;AAOA,SAASN,yBAAyBoB,IAAY;IAE5C,MAAMC,UAAU;IAEhB,SAASC,aACPC,KAAa,EACbC,EAAU,EACVC,EAAU,EACVC,EAAU,EACVC,EAAU;QAGV,MAAMC,QAAQC,OAAO,QAAQ,CAACL,IAAI;QAClC,MAAMM,QAAQD,OAAO,QAAQ,CAACJ,IAAI;QAClC,MAAMM,QAAQF,OAAO,QAAQ,CAACH,IAAI;QAClC,MAAMM,QAAQH,OAAO,QAAQ,CAACF,IAAI;QAGlC,MAAMM,IAAIjD,KAAK,KAAK,CAAE4C,AAAAA,CAAAA,QAAQG,KAAI,IAAK;QACvC,MAAMG,IAAIlD,KAAK,KAAK,CAAE8C,AAAAA,CAAAA,QAAQE,KAAI,IAAK;QAGvC,OAAO,CAAC,CAAC,EAAEC,EAAE,CAAC,EAAEC,EAAE,CAAC,CAAC;IACtB;IAGA,MAAMC,cAAcf,KAAK,OAAO,CAAC,YAAY;IAC7C,OAAOe,YAAY,OAAO,CAACd,SAASC,cAAc,IAAI;AACxD;AAEA,SAASZ,SAAS0B,QAAgB,EAAEnC,IAAuC;IACzE,MAAM,CAACgC,GAAGC,EAAE,GAAG9B,KAAK,KAAK,CAACgC;IAC1B,OAAO;QAACH,IAAIhC,KAAK,KAAK;QAAEiC,IAAIjC,KAAK,MAAM;KAAC;AAC1C;AAkEO,eAAeN,qBACpB0C,WAAmB,EACnBpC,IAAU,EACVqC,aAA6C;IAE7C,IAAIA,kBAAkBC,mBAAmB,IAAI,EAAE;QAC7C9D,MAAM,uCAAuCwB;QAC7C,MAAMuC,gBAAgBvC,KAAK,KAAK,GAAGA,KAAK,MAAM;QAC9C,MAAMwC,YAAY;QAClB,IAAID,gBAAgBC,WAAW;YAC7B,MAAMC,eAAe1D,KAAK,IAAI,CAACyD,YAAYD;YAC3C,MAAMG,WAAW3D,KAAK,KAAK,CAACiB,KAAK,KAAK,GAAGyC;YACzC,MAAME,YAAY5D,KAAK,KAAK,CAACiB,KAAK,MAAM,GAAGyC;YAC3CjE,MACE,2DACAkE,UACAC;YAEF,MAAMC,eAAe,MAAMC,gBAAgBT,aAAa;gBACtD,OAAOM;gBACP,QAAQC;YACV;YACA,OAAOC;QACT;IACF;IACA,OAAOR;AACT"}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { getRpasceneLocationSchema } from "../ai-model/index.mjs";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
class AbstractInterface {
|
|
4
|
+
}
|
|
5
|
+
const defineAction = (config)=>config;
|
|
6
|
+
const actionTapParamSchema = z.object({
|
|
7
|
+
locate: getRpasceneLocationSchema().describe('The element to be tapped')
|
|
8
|
+
});
|
|
9
|
+
const defineActionTap = (call)=>defineAction({
|
|
10
|
+
name: 'Tap',
|
|
11
|
+
description: 'Tap the element',
|
|
12
|
+
interfaceAlias: 'aiTap',
|
|
13
|
+
paramSchema: actionTapParamSchema,
|
|
14
|
+
call
|
|
15
|
+
});
|
|
16
|
+
const actionRightClickParamSchema = z.object({
|
|
17
|
+
locate: getRpasceneLocationSchema().describe('The element to be right clicked')
|
|
18
|
+
});
|
|
19
|
+
const defineActionRightClick = (call)=>defineAction({
|
|
20
|
+
name: 'RightClick',
|
|
21
|
+
description: 'Right click the element',
|
|
22
|
+
interfaceAlias: 'aiRightClick',
|
|
23
|
+
paramSchema: actionRightClickParamSchema,
|
|
24
|
+
call
|
|
25
|
+
});
|
|
26
|
+
const actionDoubleClickParamSchema = z.object({
|
|
27
|
+
locate: getRpasceneLocationSchema().describe('The element to be double clicked')
|
|
28
|
+
});
|
|
29
|
+
const defineActionDoubleClick = (call)=>defineAction({
|
|
30
|
+
name: 'DoubleClick',
|
|
31
|
+
description: 'Double click the element',
|
|
32
|
+
interfaceAlias: 'aiDoubleClick',
|
|
33
|
+
paramSchema: actionDoubleClickParamSchema,
|
|
34
|
+
call
|
|
35
|
+
});
|
|
36
|
+
const actionHoverParamSchema = z.object({
|
|
37
|
+
locate: getRpasceneLocationSchema().describe('The element to be hovered')
|
|
38
|
+
});
|
|
39
|
+
const defineActionHover = (call)=>defineAction({
|
|
40
|
+
name: 'Hover',
|
|
41
|
+
description: 'Move the mouse to the element',
|
|
42
|
+
interfaceAlias: 'aiHover',
|
|
43
|
+
paramSchema: actionHoverParamSchema,
|
|
44
|
+
call
|
|
45
|
+
});
|
|
46
|
+
const actionInputParamSchema = z.object({
|
|
47
|
+
value: z.union([
|
|
48
|
+
z.string(),
|
|
49
|
+
z.number()
|
|
50
|
+
]).transform((val)=>String(val)).describe('The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.'),
|
|
51
|
+
locate: getRpasceneLocationSchema().describe('The element to be input').optional(),
|
|
52
|
+
mode: z["enum"]([
|
|
53
|
+
'replace',
|
|
54
|
+
'clear',
|
|
55
|
+
'append'
|
|
56
|
+
]).default('replace').optional().describe('Input mode: "replace" (default) - clear the field and input the value; "append" - append the value to existing content; "clear" - clear the field without inputting new text.')
|
|
57
|
+
});
|
|
58
|
+
const defineActionInput = (call)=>defineAction({
|
|
59
|
+
name: 'Input',
|
|
60
|
+
description: 'Input the value into the element',
|
|
61
|
+
interfaceAlias: 'aiInput',
|
|
62
|
+
paramSchema: actionInputParamSchema,
|
|
63
|
+
call
|
|
64
|
+
});
|
|
65
|
+
const actionKeyboardPressParamSchema = z.object({
|
|
66
|
+
locate: getRpasceneLocationSchema().describe('The element to be clicked before pressing the key').optional(),
|
|
67
|
+
keyName: z.string().describe("The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'")
|
|
68
|
+
});
|
|
69
|
+
const defineActionKeyboardPress = (call)=>defineAction({
|
|
70
|
+
name: 'KeyboardPress',
|
|
71
|
+
description: 'Press a key or key combination, like "Enter", "Tab", "Escape", or "Control+A", "Shift+Enter". Do not use this to type text.',
|
|
72
|
+
interfaceAlias: 'aiKeyboardPress',
|
|
73
|
+
paramSchema: actionKeyboardPressParamSchema,
|
|
74
|
+
call
|
|
75
|
+
});
|
|
76
|
+
const actionScrollParamSchema = z.object({
|
|
77
|
+
direction: z["enum"]([
|
|
78
|
+
'down',
|
|
79
|
+
'up',
|
|
80
|
+
'right',
|
|
81
|
+
'left'
|
|
82
|
+
]).default('down').describe('The direction to scroll'),
|
|
83
|
+
scrollType: z["enum"]([
|
|
84
|
+
'once',
|
|
85
|
+
'untilBottom',
|
|
86
|
+
'untilTop',
|
|
87
|
+
'untilRight',
|
|
88
|
+
'untilLeft'
|
|
89
|
+
]).default('once').describe('The scroll type'),
|
|
90
|
+
distance: z.number().nullable().optional().describe('The distance in pixels to scroll'),
|
|
91
|
+
locate: getRpasceneLocationSchema().optional().describe('The element to be scrolled')
|
|
92
|
+
});
|
|
93
|
+
const defineActionScroll = (call)=>defineAction({
|
|
94
|
+
name: 'Scroll',
|
|
95
|
+
description: 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',
|
|
96
|
+
interfaceAlias: 'aiScroll',
|
|
97
|
+
paramSchema: actionScrollParamSchema,
|
|
98
|
+
call
|
|
99
|
+
});
|
|
100
|
+
const actionDragAndDropParamSchema = z.object({
|
|
101
|
+
from: getRpasceneLocationSchema().describe('The position to be dragged'),
|
|
102
|
+
to: getRpasceneLocationSchema().describe('The position to be dropped')
|
|
103
|
+
});
|
|
104
|
+
const defineActionDragAndDrop = (call)=>defineAction({
|
|
105
|
+
name: 'DragAndDrop',
|
|
106
|
+
description: 'Drag and drop the element',
|
|
107
|
+
interfaceAlias: 'aiDragAndDrop',
|
|
108
|
+
paramSchema: actionDragAndDropParamSchema,
|
|
109
|
+
call
|
|
110
|
+
});
|
|
111
|
+
const ActionLongPressParamSchema = z.object({
|
|
112
|
+
locate: getRpasceneLocationSchema().describe('The element to be long pressed'),
|
|
113
|
+
duration: z.number().default(500).optional().describe('Long press duration in milliseconds')
|
|
114
|
+
});
|
|
115
|
+
const defineActionLongPress = (call)=>defineAction({
|
|
116
|
+
name: 'LongPress',
|
|
117
|
+
description: 'Long press the element',
|
|
118
|
+
paramSchema: ActionLongPressParamSchema,
|
|
119
|
+
call
|
|
120
|
+
});
|
|
121
|
+
const ActionSwipeParamSchema = z.object({
|
|
122
|
+
start: getRpasceneLocationSchema().optional().describe('Starting point of the swipe gesture, if not specified, the center of the page will be used'),
|
|
123
|
+
direction: z["enum"]([
|
|
124
|
+
'up',
|
|
125
|
+
'down',
|
|
126
|
+
'left',
|
|
127
|
+
'right'
|
|
128
|
+
]).optional().describe('The direction to swipe (required when using distance). The direction means the direction of the finger swipe.'),
|
|
129
|
+
distance: z.number().optional().describe('The distance in pixels to swipe (mutually exclusive with end)'),
|
|
130
|
+
end: getRpasceneLocationSchema().optional().describe('Ending point of the swipe gesture (mutually exclusive with distance)'),
|
|
131
|
+
duration: z.number().default(300).describe('Duration of the swipe gesture in milliseconds'),
|
|
132
|
+
repeat: z.number().optional().describe('The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)')
|
|
133
|
+
});
|
|
134
|
+
const defineActionSwipe = (call)=>defineAction({
|
|
135
|
+
name: 'Swipe',
|
|
136
|
+
description: 'Perform a swipe gesture. You must specify either "end" (target location) or "distance" + "direction" - they are mutually exclusive. Use "end" for precise location-based swipes, or "distance" + "direction" for relative movement.',
|
|
137
|
+
paramSchema: ActionSwipeParamSchema,
|
|
138
|
+
call
|
|
139
|
+
});
|
|
140
|
+
const actionClearInputParamSchema = z.object({
|
|
141
|
+
locate: getRpasceneLocationSchema().describe('The input field to be cleared')
|
|
142
|
+
});
|
|
143
|
+
const defineActionClearInput = (call)=>defineAction({
|
|
144
|
+
name: 'ClearInput',
|
|
145
|
+
description: 'Clear the text content of an input field',
|
|
146
|
+
interfaceAlias: 'aiClearInput',
|
|
147
|
+
paramSchema: actionClearInputParamSchema,
|
|
148
|
+
call
|
|
149
|
+
});
|
|
150
|
+
export { AbstractInterface, ActionLongPressParamSchema, ActionSwipeParamSchema, actionClearInputParamSchema, actionDoubleClickParamSchema, actionDragAndDropParamSchema, actionHoverParamSchema, actionInputParamSchema, actionKeyboardPressParamSchema, actionRightClickParamSchema, actionScrollParamSchema, actionTapParamSchema, defineAction, defineActionClearInput, defineActionDoubleClick, defineActionDragAndDrop, defineActionHover, defineActionInput, defineActionKeyboardPress, defineActionLongPress, defineActionRightClick, defineActionScroll, defineActionSwipe, defineActionTap };
|
|
151
|
+
|
|
152
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"device\\index.mjs","sources":["webpack://@rpascene/core/./src/device/index.ts"],"sourcesContent":["import { getRpasceneLocationSchema } from '@/ai-model';\nimport type { DeviceAction, LocateResultElement } from '@/types';\nimport type { ElementNode } from '@rpascene/shared/extractor';\nimport { _keyDefinitions } from '@rpascene/shared/us-keyboard-layout';\nimport { z } from 'zod';\nimport type { ElementCacheFeature, Rect, Size, UIContext } from '../types';\n\nexport abstract class AbstractInterface {\n abstract interfaceType: string;\n\n abstract screenshotBase64(): Promise<string>;\n abstract size(): Promise<Size>;\n abstract actionSpace(): DeviceAction[] | Promise<DeviceAction[]>;\n\n abstract cacheFeatureForRect?(\n rect: Rect,\n opt?: { _orderSensitive: boolean },\n ): Promise<ElementCacheFeature>;\n abstract rectMatchesCacheFeature?(\n feature: ElementCacheFeature,\n ): Promise<Rect>;\n\n abstract destroy?(): Promise<void>;\n\n abstract describe?(): string;\n abstract beforeInvokeAction?(actionName: string, param: any): Promise<void>;\n abstract afterInvokeAction?(actionName: string, param: any): Promise<void>;\n\n // @deprecated do NOT extend this method\n abstract getElementsNodeTree?: () => Promise<ElementNode>;\n\n // @deprecated do NOT extend this method\n abstract url?: () => string | Promise<string>;\n\n // @deprecated do NOT extend this method\n abstract evaluateJavaScript?<T = any>(script: string): Promise<T>;\n\n // @deprecated do NOT extend this method\n abstract getContext?(): Promise<UIContext>;\n}\n\n// Generic function to define actions with proper type inference\n// TRuntime allows specifying a different type for the runtime parameter (after location resolution)\nexport const defineAction = <\n TSchema extends z.ZodType,\n TRuntime = z.infer<TSchema>,\n>(\n config: {\n name: string;\n description: string;\n interfaceAlias?: string;\n paramSchema: TSchema;\n call: (param: TRuntime) => Promise<void>;\n } & Partial<\n Omit<\n DeviceAction<TRuntime>,\n 'name' | 'description' | 'interfaceAlias' | 'paramSchema' | 'call'\n >\n >,\n): DeviceAction<TRuntime> => {\n return config as any; // Type assertion needed because schema validation type differs from runtime type\n};\n\n// Tap\nexport const actionTapParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe('The element to be tapped'),\n});\n// Override the inferred type to use LocateResultElement for the runtime locate field\nexport type ActionTapParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionTap = (\n call: (param: ActionTapParam) => Promise<void>,\n): DeviceAction<ActionTapParam> => {\n return defineAction<typeof actionTapParamSchema, ActionTapParam>({\n name: 'Tap',\n description: 'Tap the element',\n interfaceAlias: 'aiTap',\n paramSchema: actionTapParamSchema,\n call,\n });\n};\n\n// RightClick\nexport const actionRightClickParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe(\n 'The element to be right clicked',\n ),\n});\nexport type ActionRightClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionRightClick = (\n call: (param: ActionRightClickParam) => Promise<void>,\n): DeviceAction<ActionRightClickParam> => {\n return defineAction<\n typeof actionRightClickParamSchema,\n ActionRightClickParam\n >({\n name: 'RightClick',\n description: 'Right click the element',\n interfaceAlias: 'aiRightClick',\n paramSchema: actionRightClickParamSchema,\n call,\n });\n};\n\n// DoubleClick\nexport const actionDoubleClickParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe(\n 'The element to be double clicked',\n ),\n});\nexport type ActionDoubleClickParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionDoubleClick = (\n call: (param: ActionDoubleClickParam) => Promise<void>,\n): DeviceAction<ActionDoubleClickParam> => {\n return defineAction<\n typeof actionDoubleClickParamSchema,\n ActionDoubleClickParam\n >({\n name: 'DoubleClick',\n description: 'Double click the element',\n interfaceAlias: 'aiDoubleClick',\n paramSchema: actionDoubleClickParamSchema,\n call,\n });\n};\n\n// Hover\nexport const actionHoverParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe('The element to be hovered'),\n});\nexport type ActionHoverParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionHover = (\n call: (param: ActionHoverParam) => Promise<void>,\n): DeviceAction<ActionHoverParam> => {\n return defineAction<typeof actionHoverParamSchema, ActionHoverParam>({\n name: 'Hover',\n description: 'Move the mouse to the element',\n interfaceAlias: 'aiHover',\n paramSchema: actionHoverParamSchema,\n call,\n });\n};\n\n// Input\nexport const actionInputParamSchema = z.object({\n value: z\n .union([z.string(), z.number()])\n .transform((val) => String(val))\n .describe(\n 'The text to input. Provide the final content for replace/append modes, or an empty string when using clear mode to remove existing text.',\n ),\n locate: getRpasceneLocationSchema()\n .describe('The element to be input')\n .optional(),\n mode: z\n .enum(['replace', 'clear', 'append'])\n .default('replace')\n .optional()\n .describe(\n 'Input mode: \"replace\" (default) - clear the field and input the value; \"append\" - append the value to existing content; \"clear\" - clear the field without inputting new text.',\n ),\n});\nexport type ActionInputParam = {\n value: string;\n locate?: LocateResultElement;\n mode?: 'replace' | 'clear' | 'append';\n};\n\nexport const defineActionInput = (\n call: (param: ActionInputParam) => Promise<void>,\n): DeviceAction<ActionInputParam> => {\n return defineAction<typeof actionInputParamSchema, ActionInputParam>({\n name: 'Input',\n description: 'Input the value into the element',\n interfaceAlias: 'aiInput',\n paramSchema: actionInputParamSchema,\n call,\n });\n};\n\n// KeyboardPress\nexport const actionKeyboardPressParamSchema = z.object({\n locate: getRpasceneLocationSchema()\n .describe('The element to be clicked before pressing the key')\n .optional(),\n keyName: z\n .string()\n .describe(\n \"The key to be pressed. Use '+' for key combinations, e.g., 'Control+A', 'Shift+Enter'\",\n ),\n});\nexport type ActionKeyboardPressParam = {\n locate?: LocateResultElement;\n keyName: string;\n};\n\nexport const defineActionKeyboardPress = (\n call: (param: ActionKeyboardPressParam) => Promise<void>,\n): DeviceAction<ActionKeyboardPressParam> => {\n return defineAction<\n typeof actionKeyboardPressParamSchema,\n ActionKeyboardPressParam\n >({\n name: 'KeyboardPress',\n description:\n 'Press a key or key combination, like \"Enter\", \"Tab\", \"Escape\", or \"Control+A\", \"Shift+Enter\". Do not use this to type text.',\n interfaceAlias: 'aiKeyboardPress',\n paramSchema: actionKeyboardPressParamSchema,\n call,\n });\n};\n\n// Scroll\nexport const actionScrollParamSchema = z.object({\n direction: z\n .enum(['down', 'up', 'right', 'left'])\n .default('down')\n .describe('The direction to scroll'),\n scrollType: z\n .enum(['once', 'untilBottom', 'untilTop', 'untilRight', 'untilLeft'])\n .default('once')\n .describe('The scroll type'),\n distance: z\n .number()\n .nullable()\n .optional()\n .describe('The distance in pixels to scroll'),\n locate: getRpasceneLocationSchema()\n .optional()\n .describe('The element to be scrolled'),\n});\nexport type ActionScrollParam = {\n direction?: 'down' | 'up' | 'right' | 'left';\n scrollType?: 'once' | 'untilBottom' | 'untilTop' | 'untilRight' | 'untilLeft';\n distance?: number | null;\n locate?: LocateResultElement;\n};\n\nexport const defineActionScroll = (\n call: (param: ActionScrollParam) => Promise<void>,\n): DeviceAction<ActionScrollParam> => {\n return defineAction<typeof actionScrollParamSchema, ActionScrollParam>({\n name: 'Scroll',\n description:\n 'Scroll the page or an element. The direction to scroll, the scroll type, and the distance to scroll. The distance is the number of pixels to scroll. If not specified, use `down` direction, `once` scroll type, and `null` distance.',\n interfaceAlias: 'aiScroll',\n paramSchema: actionScrollParamSchema,\n call,\n });\n};\n\n// DragAndDrop\nexport const actionDragAndDropParamSchema = z.object({\n from: getRpasceneLocationSchema().describe('The position to be dragged'),\n to: getRpasceneLocationSchema().describe('The position to be dropped'),\n});\nexport type ActionDragAndDropParam = {\n from: LocateResultElement;\n to: LocateResultElement;\n};\n\nexport const defineActionDragAndDrop = (\n call: (param: ActionDragAndDropParam) => Promise<void>,\n): DeviceAction<ActionDragAndDropParam> => {\n return defineAction<\n typeof actionDragAndDropParamSchema,\n ActionDragAndDropParam\n >({\n name: 'DragAndDrop',\n description: 'Drag and drop the element',\n interfaceAlias: 'aiDragAndDrop',\n paramSchema: actionDragAndDropParamSchema,\n call,\n });\n};\n\nexport const ActionLongPressParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe(\n 'The element to be long pressed',\n ),\n duration: z\n .number()\n .default(500)\n .optional()\n .describe('Long press duration in milliseconds'),\n});\n\nexport type ActionLongPressParam = {\n locate: LocateResultElement;\n duration?: number;\n};\nexport const defineActionLongPress = (\n call: (param: ActionLongPressParam) => Promise<void>,\n): DeviceAction<ActionLongPressParam> => {\n return defineAction<typeof ActionLongPressParamSchema, ActionLongPressParam>({\n name: 'LongPress',\n description: 'Long press the element',\n paramSchema: ActionLongPressParamSchema,\n call,\n });\n};\n\nexport const ActionSwipeParamSchema = z.object({\n start: getRpasceneLocationSchema()\n .optional()\n .describe(\n 'Starting point of the swipe gesture, if not specified, the center of the page will be used',\n ),\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .optional()\n .describe(\n 'The direction to swipe (required when using distance). The direction means the direction of the finger swipe.',\n ),\n distance: z\n .number()\n .optional()\n .describe('The distance in pixels to swipe (mutually exclusive with end)'),\n end: getRpasceneLocationSchema()\n .optional()\n .describe(\n 'Ending point of the swipe gesture (mutually exclusive with distance)',\n ),\n duration: z\n .number()\n .default(300)\n .describe('Duration of the swipe gesture in milliseconds'),\n repeat: z\n .number()\n .optional()\n .describe(\n 'The number of times to repeat the swipe gesture. 1 for default, 0 for infinite (e.g. endless swipe until the end of the page)',\n ),\n});\n\nexport type ActionSwipeParam = {\n start?: LocateResultElement;\n direction?: 'up' | 'down' | 'left' | 'right';\n distance?: number;\n end?: LocateResultElement;\n duration?: number;\n repeat?: number;\n};\n\nexport const defineActionSwipe = (\n call: (param: ActionSwipeParam) => Promise<void>,\n): DeviceAction<ActionSwipeParam> => {\n return defineAction<typeof ActionSwipeParamSchema, ActionSwipeParam>({\n name: 'Swipe',\n description:\n 'Perform a swipe gesture. You must specify either \"end\" (target location) or \"distance\" + \"direction\" - they are mutually exclusive. Use \"end\" for precise location-based swipes, or \"distance\" + \"direction\" for relative movement.',\n paramSchema: ActionSwipeParamSchema,\n call,\n });\n};\n\n// ClearInput\nexport const actionClearInputParamSchema = z.object({\n locate: getRpasceneLocationSchema().describe('The input field to be cleared'),\n});\nexport type ActionClearInputParam = {\n locate: LocateResultElement;\n};\n\nexport const defineActionClearInput = (\n call: (param: ActionClearInputParam) => Promise<void>,\n): DeviceAction<ActionClearInputParam> => {\n return defineAction<\n typeof actionClearInputParamSchema,\n ActionClearInputParam\n >({\n name: 'ClearInput',\n description: 'Clear the text content of an input field',\n interfaceAlias: 'aiClearInput',\n paramSchema: actionClearInputParamSchema,\n call,\n });\n};\n\nexport type { DeviceAction } from '../types';\n"],"names":["AbstractInterface","defineAction","config","actionTapParamSchema","z","getRpasceneLocationSchema","defineActionTap","call","actionRightClickParamSchema","defineActionRightClick","actionDoubleClickParamSchema","defineActionDoubleClick","actionHoverParamSchema","defineActionHover","actionInputParamSchema","val","String","defineActionInput","actionKeyboardPressParamSchema","defineActionKeyboardPress","actionScrollParamSchema","defineActionScroll","actionDragAndDropParamSchema","defineActionDragAndDrop","ActionLongPressParamSchema","defineActionLongPress","ActionSwipeParamSchema","defineActionSwipe","actionClearInputParamSchema","defineActionClearInput"],"mappings":";;AAOO,MAAeA;AAgCtB;AAIO,MAAMC,eAAe,CAI1BC,SAaOA;AAIF,MAAMC,uBAAuBC,EAAE,MAAM,CAAC;IAC3C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAMO,MAAMC,kBAAkB,CAC7BC,OAEON,aAA0D;QAC/D,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaE;QACbI;IACF;AAIK,MAAMC,8BAA8BJ,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMI,yBAAyB,CACpCF,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaO;QACbD;IACF;AAIK,MAAMG,+BAA+BN,EAAE,MAAM,CAAC;IACnD,QAAQC,4BAA4B,QAAQ,CAC1C;AAEJ;AAKO,MAAMM,0BAA0B,CACrCJ,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaS;QACbH;IACF;AAIK,MAAMK,yBAAyBR,EAAE,MAAM,CAAC;IAC7C,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMQ,oBAAoB,CAC/BN,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaW;QACbL;IACF;AAIK,MAAMO,yBAAyBV,EAAE,MAAM,CAAC;IAC7C,OAAOA,EAAAA,KACC,CAAC;QAACA,EAAE,MAAM;QAAIA,EAAE,MAAM;KAAG,EAC9B,SAAS,CAAC,CAACW,MAAQC,OAAOD,MAC1B,QAAQ,CACP;IAEJ,QAAQV,4BACL,QAAQ,CAAC,2BACT,QAAQ;IACX,MAAMD,CAAC,CAADA,OACC,CAAC;QAAC;QAAW;QAAS;KAAS,EACnC,OAAO,CAAC,WACR,QAAQ,GACR,QAAQ,CACP;AAEN;AAOO,MAAMa,oBAAoB,CAC/BV,OAEON,aAA8D;QACnE,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaa;QACbP;IACF;AAIK,MAAMW,iCAAiCd,EAAE,MAAM,CAAC;IACrD,QAAQC,4BACL,QAAQ,CAAC,qDACT,QAAQ;IACX,SAASD,EAAAA,MACA,GACN,QAAQ,CACP;AAEN;AAMO,MAAMe,4BAA4B,CACvCZ,OAEON,aAGL;QACA,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAaiB;QACbX;IACF;AAIK,MAAMa,0BAA0BhB,EAAE,MAAM,CAAC;IAC9C,WAAWA,CAAC,CAADA,OACJ,CAAC;QAAC;QAAQ;QAAM;QAAS;KAAO,EACpC,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,YAAYA,CAAC,CAADA,OACL,CAAC;QAAC;QAAQ;QAAe;QAAY;QAAc;KAAY,EACnE,OAAO,CAAC,QACR,QAAQ,CAAC;IACZ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,GACR,QAAQ,CAAC;IACZ,QAAQC,4BACL,QAAQ,GACR,QAAQ,CAAC;AACd;AAQO,MAAMgB,qBAAqB,CAChCd,OAEON,aAAgE;QACrE,MAAM;QACN,aACE;QACF,gBAAgB;QAChB,aAAamB;QACbb;IACF;AAIK,MAAMe,+BAA+BlB,EAAE,MAAM,CAAC;IACnD,MAAMC,4BAA4B,QAAQ,CAAC;IAC3C,IAAIA,4BAA4B,QAAQ,CAAC;AAC3C;AAMO,MAAMkB,0BAA0B,CACrChB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAaqB;QACbf;IACF;AAGK,MAAMiB,6BAA6BpB,EAAE,MAAM,CAAC;IACjD,QAAQC,4BAA4B,QAAQ,CAC1C;IAEF,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,GACR,QAAQ,CAAC;AACd;AAMO,MAAMqB,wBAAwB,CACnClB,OAEON,aAAsE;QAC3E,MAAM;QACN,aAAa;QACb,aAAauB;QACbjB;IACF;AAGK,MAAMmB,yBAAyBtB,EAAE,MAAM,CAAC;IAC7C,OAAOC,4BACJ,QAAQ,GACR,QAAQ,CACP;IAEJ,WAAWD,CAAC,CAADA,OACJ,CAAC;QAAC;QAAM;QAAQ;QAAQ;KAAQ,EACpC,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUA,EAAAA,MACD,GACN,QAAQ,GACR,QAAQ,CAAC;IACZ,KAAKC,4BACF,QAAQ,GACR,QAAQ,CACP;IAEJ,UAAUD,EAAAA,MACD,GACN,OAAO,CAAC,KACR,QAAQ,CAAC;IACZ,QAAQA,EAAAA,MACC,GACN,QAAQ,GACR,QAAQ,CACP;AAEN;AAWO,MAAMuB,oBAAoB,CAC/BpB,OAEON,aAA8D;QACnE,MAAM;QACN,aACE;QACF,aAAayB;QACbnB;IACF;AAIK,MAAMqB,8BAA8BxB,EAAE,MAAM,CAAC;IAClD,QAAQC,4BAA4B,QAAQ,CAAC;AAC/C;AAKO,MAAMwB,yBAAyB,CACpCtB,OAEON,aAGL;QACA,MAAM;QACN,aAAa;QACb,gBAAgB;QAChB,aAAa2B;QACbrB;IACF"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
import { httpImg2Base64, imageInfo, imageInfoOfBase64, localImg2Base64, resizeAndConvertImgBuffer, saveBase64Image, zoomForGPT4o } from "@rpascene/shared/img";
|
|
2
|
+
export { httpImg2Base64, imageInfo, imageInfoOfBase64, localImg2Base64, resizeAndConvertImgBuffer, saveBase64Image, zoomForGPT4o };
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { Executor } from "./ai-model/action-executor.mjs";
|
|
3
|
+
import insight from "./insight/index.mjs";
|
|
4
|
+
import { getVersion } from "./utils.mjs";
|
|
5
|
+
import { AiLocateElement, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, describeUserPage, getRpasceneLocationSchema, plan } from "./ai-model/index.mjs";
|
|
6
|
+
import { RPASCENE_MODEL_NAME } from "@rpascene/shared/env";
|
|
7
|
+
import { Agent, createAgent } from "./agent/index.mjs";
|
|
8
|
+
const src = insight;
|
|
9
|
+
export { Agent, AiLocateElement, Executor, insight as Insight, PointSchema, RPASCENE_MODEL_NAME, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, createAgent, src as default, describeUserPage, getRpasceneLocationSchema, getVersion, plan, z };
|
|
10
|
+
|
|
11
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","sources":["webpack://@rpascene/core/./src/index.ts"],"sourcesContent":["import { z } from 'zod';\nimport { Executor } from './ai-model/action-executor';\nimport Insight from './insight/index';\nimport { getVersion } from './utils';\n\nexport {\n plan,\n describeUserPage,\n AiLocateElement,\n getRpasceneLocationSchema,\n type RpasceneLocationResultType,\n PointSchema,\n SizeSchema,\n RectSchema,\n TMultimodalPromptSchema,\n TUserPromptSchema,\n type TMultimodalPrompt,\n type TUserPrompt,\n} from './ai-model/index';\n\nexport { RPASCENE_MODEL_NAME } from '@rpascene/shared/env';\n\nexport type * from './types';\n\nexport { z };\n\nexport default Insight;\nexport { Executor, Insight, getVersion };\n\nexport type {\n RpasceneYamlScript,\n RpasceneYamlTask,\n RpasceneYamlFlowItem,\n RpasceneYamlConfigResult,\n RpasceneYamlConfig,\n RpasceneYamlScriptWebEnv,\n RpasceneYamlScriptAndroidEnv,\n RpasceneYamlScriptIOSEnv,\n RpasceneYamlScriptEnv,\n LocateOption,\n DetailedLocateParam,\n} from './yaml';\n\nexport { Agent, type AgentOpt, createAgent } from './agent';\n"],"names":["Insight"],"mappings":";;;;;;;AA0BA,YAAeA"}
|