@godscene/core 1.7.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/es/agent/agent.mjs +767 -0
- package/dist/es/agent/common.mjs +0 -0
- package/dist/es/agent/execution-session.mjs +39 -0
- package/dist/es/agent/index.mjs +6 -0
- package/dist/es/agent/task-builder.mjs +343 -0
- package/dist/es/agent/task-cache.mjs +212 -0
- package/dist/es/agent/tasks.mjs +428 -0
- package/dist/es/agent/ui-utils.mjs +101 -0
- package/dist/es/agent/utils.mjs +167 -0
- package/dist/es/ai-model/auto-glm/actions.mjs +237 -0
- package/dist/es/ai-model/auto-glm/index.mjs +6 -0
- package/dist/es/ai-model/auto-glm/parser.mjs +237 -0
- package/dist/es/ai-model/auto-glm/planning.mjs +69 -0
- package/dist/es/ai-model/auto-glm/prompt.mjs +220 -0
- package/dist/es/ai-model/auto-glm/util.mjs +7 -0
- package/dist/es/ai-model/connectivity.mjs +136 -0
- package/dist/es/ai-model/conversation-history.mjs +193 -0
- package/dist/es/ai-model/index.mjs +12 -0
- package/dist/es/ai-model/inspect.mjs +395 -0
- package/dist/es/ai-model/llm-planning.mjs +231 -0
- package/dist/es/ai-model/prompt/common.mjs +5 -0
- package/dist/es/ai-model/prompt/describe.mjs +64 -0
- package/dist/es/ai-model/prompt/extraction.mjs +129 -0
- package/dist/es/ai-model/prompt/llm-locator.mjs +49 -0
- package/dist/es/ai-model/prompt/llm-planning.mjs +584 -0
- package/dist/es/ai-model/prompt/llm-section-locator.mjs +42 -0
- package/dist/es/ai-model/prompt/order-sensitive-judge.mjs +33 -0
- package/dist/es/ai-model/prompt/playwright-generator.mjs +115 -0
- package/dist/es/ai-model/prompt/ui-tars-planning.mjs +34 -0
- package/dist/es/ai-model/prompt/util.mjs +57 -0
- package/dist/es/ai-model/prompt/yaml-generator.mjs +201 -0
- package/dist/es/ai-model/service-caller/codex-app-server.mjs +573 -0
- package/dist/es/ai-model/service-caller/image-detail.mjs +4 -0
- package/dist/es/ai-model/service-caller/index.mjs +648 -0
- package/dist/es/ai-model/service-caller/request-timeout.mjs +47 -0
- package/dist/es/ai-model/ui-tars-planning.mjs +247 -0
- package/dist/es/common.mjs +382 -0
- package/dist/es/device/device-options.mjs +0 -0
- package/dist/es/device/index.mjs +340 -0
- package/dist/es/dump/html-utils.mjs +290 -0
- package/dist/es/dump/index.mjs +3 -0
- package/dist/es/dump/screenshot-restoration.mjs +30 -0
- package/dist/es/dump/screenshot-store.mjs +125 -0
- package/dist/es/index.mjs +17 -0
- package/dist/es/report-cli.mjs +149 -0
- package/dist/es/report-generator.mjs +203 -0
- package/dist/es/report-markdown.mjs +216 -0
- package/dist/es/report.mjs +287 -0
- package/dist/es/screenshot-item.mjs +120 -0
- package/dist/es/service/index.mjs +272 -0
- package/dist/es/service/utils.mjs +13 -0
- package/dist/es/skill/index.mjs +35 -0
- package/dist/es/task-runner.mjs +261 -0
- package/dist/es/task-timing.mjs +10 -0
- package/dist/es/tree.mjs +11 -0
- package/dist/es/types.mjs +202 -0
- package/dist/es/utils.mjs +232 -0
- package/dist/es/yaml/builder.mjs +11 -0
- package/dist/es/yaml/index.mjs +4 -0
- package/dist/es/yaml/player.mjs +425 -0
- package/dist/es/yaml/utils.mjs +100 -0
- package/dist/es/yaml.mjs +0 -0
- package/dist/lib/agent/agent.js +815 -0
- package/dist/lib/agent/common.js +5 -0
- package/dist/lib/agent/execution-session.js +73 -0
- package/dist/lib/agent/index.js +76 -0
- package/dist/lib/agent/task-builder.js +380 -0
- package/dist/lib/agent/task-cache.js +264 -0
- package/dist/lib/agent/tasks.js +471 -0
- package/dist/lib/agent/ui-utils.js +153 -0
- package/dist/lib/agent/utils.js +238 -0
- package/dist/lib/ai-model/auto-glm/actions.js +271 -0
- package/dist/lib/ai-model/auto-glm/index.js +64 -0
- package/dist/lib/ai-model/auto-glm/parser.js +280 -0
- package/dist/lib/ai-model/auto-glm/planning.js +103 -0
- package/dist/lib/ai-model/auto-glm/prompt.js +257 -0
- package/dist/lib/ai-model/auto-glm/util.js +44 -0
- package/dist/lib/ai-model/connectivity.js +180 -0
- package/dist/lib/ai-model/conversation-history.js +227 -0
- package/dist/lib/ai-model/index.js +127 -0
- package/dist/lib/ai-model/inspect.js +441 -0
- package/dist/lib/ai-model/llm-planning.js +268 -0
- package/dist/lib/ai-model/prompt/common.js +39 -0
- package/dist/lib/ai-model/prompt/describe.js +98 -0
- package/dist/lib/ai-model/prompt/extraction.js +169 -0
- package/dist/lib/ai-model/prompt/llm-locator.js +86 -0
- package/dist/lib/ai-model/prompt/llm-planning.js +621 -0
- package/dist/lib/ai-model/prompt/llm-section-locator.js +79 -0
- package/dist/lib/ai-model/prompt/order-sensitive-judge.js +70 -0
- package/dist/lib/ai-model/prompt/playwright-generator.js +176 -0
- package/dist/lib/ai-model/prompt/ui-tars-planning.js +71 -0
- package/dist/lib/ai-model/prompt/util.js +103 -0
- package/dist/lib/ai-model/prompt/yaml-generator.js +262 -0
- package/dist/lib/ai-model/service-caller/codex-app-server.js +622 -0
- package/dist/lib/ai-model/service-caller/image-detail.js +38 -0
- package/dist/lib/ai-model/service-caller/index.js +716 -0
- package/dist/lib/ai-model/service-caller/request-timeout.js +93 -0
- package/dist/lib/ai-model/ui-tars-planning.js +281 -0
- package/dist/lib/common.js +491 -0
- package/dist/lib/device/device-options.js +18 -0
- package/dist/lib/device/index.js +467 -0
- package/dist/lib/dump/html-utils.js +366 -0
- package/dist/lib/dump/index.js +58 -0
- package/dist/lib/dump/screenshot-restoration.js +64 -0
- package/dist/lib/dump/screenshot-store.js +165 -0
- package/dist/lib/index.js +184 -0
- package/dist/lib/report-cli.js +189 -0
- package/dist/lib/report-generator.js +244 -0
- package/dist/lib/report-markdown.js +253 -0
- package/dist/lib/report.js +333 -0
- package/dist/lib/screenshot-item.js +154 -0
- package/dist/lib/service/index.js +306 -0
- package/dist/lib/service/utils.js +47 -0
- package/dist/lib/skill/index.js +69 -0
- package/dist/lib/task-runner.js +298 -0
- package/dist/lib/task-timing.js +44 -0
- package/dist/lib/tree.js +51 -0
- package/dist/lib/types.js +298 -0
- package/dist/lib/utils.js +314 -0
- package/dist/lib/yaml/builder.js +55 -0
- package/dist/lib/yaml/index.js +79 -0
- package/dist/lib/yaml/player.js +459 -0
- package/dist/lib/yaml/utils.js +153 -0
- package/dist/lib/yaml.js +18 -0
- package/dist/types/agent/agent.d.ts +220 -0
- package/dist/types/agent/common.d.ts +0 -0
- package/dist/types/agent/execution-session.d.ts +36 -0
- package/dist/types/agent/index.d.ts +9 -0
- package/dist/types/agent/task-builder.d.ts +34 -0
- package/dist/types/agent/task-cache.d.ts +49 -0
- package/dist/types/agent/tasks.d.ts +70 -0
- package/dist/types/agent/ui-utils.d.ts +14 -0
- package/dist/types/agent/utils.d.ts +25 -0
- package/dist/types/ai-model/auto-glm/actions.d.ts +78 -0
- package/dist/types/ai-model/auto-glm/index.d.ts +6 -0
- package/dist/types/ai-model/auto-glm/parser.d.ts +18 -0
- package/dist/types/ai-model/auto-glm/planning.d.ts +12 -0
- package/dist/types/ai-model/auto-glm/prompt.d.ts +27 -0
- package/dist/types/ai-model/auto-glm/util.d.ts +13 -0
- package/dist/types/ai-model/connectivity.d.ts +20 -0
- package/dist/types/ai-model/conversation-history.d.ts +105 -0
- package/dist/types/ai-model/index.d.ts +16 -0
- package/dist/types/ai-model/inspect.d.ts +67 -0
- package/dist/types/ai-model/llm-planning.d.ts +19 -0
- package/dist/types/ai-model/prompt/common.d.ts +2 -0
- package/dist/types/ai-model/prompt/describe.d.ts +1 -0
- package/dist/types/ai-model/prompt/extraction.d.ts +7 -0
- package/dist/types/ai-model/prompt/llm-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/llm-planning.d.ts +10 -0
- package/dist/types/ai-model/prompt/llm-section-locator.d.ts +3 -0
- package/dist/types/ai-model/prompt/order-sensitive-judge.d.ts +2 -0
- package/dist/types/ai-model/prompt/playwright-generator.d.ts +26 -0
- package/dist/types/ai-model/prompt/ui-tars-planning.d.ts +2 -0
- package/dist/types/ai-model/prompt/util.d.ts +33 -0
- package/dist/types/ai-model/prompt/yaml-generator.d.ts +102 -0
- package/dist/types/ai-model/service-caller/codex-app-server.d.ts +42 -0
- package/dist/types/ai-model/service-caller/image-detail.d.ts +2 -0
- package/dist/types/ai-model/service-caller/index.d.ts +60 -0
- package/dist/types/ai-model/service-caller/request-timeout.d.ts +32 -0
- package/dist/types/ai-model/ui-tars-planning.d.ts +72 -0
- package/dist/types/common.d.ts +288 -0
- package/dist/types/device/device-options.d.ts +155 -0
- package/dist/types/device/index.d.ts +2565 -0
- package/dist/types/dump/html-utils.d.ts +75 -0
- package/dist/types/dump/index.d.ts +5 -0
- package/dist/types/dump/screenshot-restoration.d.ts +8 -0
- package/dist/types/dump/screenshot-store.d.ts +49 -0
- package/dist/types/index.d.ts +21 -0
- package/dist/types/report-cli.d.ts +36 -0
- package/dist/types/report-generator.d.ts +88 -0
- package/dist/types/report-markdown.d.ts +24 -0
- package/dist/types/report.d.ts +52 -0
- package/dist/types/screenshot-item.d.ts +67 -0
- package/dist/types/service/index.d.ts +24 -0
- package/dist/types/service/utils.d.ts +2 -0
- package/dist/types/skill/index.d.ts +25 -0
- package/dist/types/task-runner.d.ts +50 -0
- package/dist/types/task-timing.d.ts +8 -0
- package/dist/types/tree.d.ts +4 -0
- package/dist/types/types.d.ts +684 -0
- package/dist/types/utils.d.ts +45 -0
- package/dist/types/yaml/builder.d.ts +2 -0
- package/dist/types/yaml/index.d.ts +4 -0
- package/dist/types/yaml/player.d.ts +34 -0
- package/dist/types/yaml/utils.d.ts +9 -0
- package/dist/types/yaml.d.ts +215 -0
- package/package.json +130 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
function _define_property(obj, key, value) {
|
|
2
|
+
if (key in obj) Object.defineProperty(obj, key, {
|
|
3
|
+
value: value,
|
|
4
|
+
enumerable: true,
|
|
5
|
+
configurable: true,
|
|
6
|
+
writable: true
|
|
7
|
+
});
|
|
8
|
+
else obj[key] = value;
|
|
9
|
+
return obj;
|
|
10
|
+
}
|
|
11
|
+
var _computedKey;
|
|
12
|
+
_computedKey = Symbol.iterator;
|
|
13
|
+
let _computedKey1 = _computedKey;
|
|
14
|
+
class ConversationHistory {
|
|
15
|
+
resetPendingFeedbackMessageIfExists() {
|
|
16
|
+
if (this.pendingFeedbackMessage) this.pendingFeedbackMessage = '';
|
|
17
|
+
}
|
|
18
|
+
append(message) {
|
|
19
|
+
this.messages.push(message);
|
|
20
|
+
}
|
|
21
|
+
seed(messages) {
|
|
22
|
+
this.reset();
|
|
23
|
+
messages.forEach((message)=>{
|
|
24
|
+
this.append(message);
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
reset() {
|
|
28
|
+
this.messages.length = 0;
|
|
29
|
+
this.memories.length = 0;
|
|
30
|
+
this.subGoals.length = 0;
|
|
31
|
+
this.historicalLogs.length = 0;
|
|
32
|
+
this.pendingFeedbackMessage = '';
|
|
33
|
+
}
|
|
34
|
+
snapshot(maxImages) {
|
|
35
|
+
if (void 0 === maxImages) return [
|
|
36
|
+
...this.messages
|
|
37
|
+
];
|
|
38
|
+
const clonedMessages = structuredClone(this.messages);
|
|
39
|
+
let imageCount = 0;
|
|
40
|
+
for(let i = clonedMessages.length - 1; i >= 0; i--){
|
|
41
|
+
const message = clonedMessages[i];
|
|
42
|
+
const content = message.content;
|
|
43
|
+
if (Array.isArray(content)) for(let j = 0; j < content.length; j++){
|
|
44
|
+
const item = content[j];
|
|
45
|
+
if ('image_url' === item.type) {
|
|
46
|
+
imageCount++;
|
|
47
|
+
if (imageCount > maxImages) content[j] = {
|
|
48
|
+
type: 'text',
|
|
49
|
+
text: '(image ignored due to size optimization)'
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return clonedMessages;
|
|
55
|
+
}
|
|
56
|
+
get length() {
|
|
57
|
+
return this.messages.length;
|
|
58
|
+
}
|
|
59
|
+
[_computedKey1]() {
|
|
60
|
+
return this.messages[Symbol.iterator]();
|
|
61
|
+
}
|
|
62
|
+
toJSON() {
|
|
63
|
+
return this.snapshot();
|
|
64
|
+
}
|
|
65
|
+
setSubGoals(subGoals) {
|
|
66
|
+
this.subGoals = subGoals.map((goal)=>({
|
|
67
|
+
...goal
|
|
68
|
+
}));
|
|
69
|
+
this.markFirstPendingAsRunning();
|
|
70
|
+
}
|
|
71
|
+
mergeSubGoals(subGoals) {
|
|
72
|
+
if (0 === this.subGoals.length) return void this.setSubGoals(subGoals);
|
|
73
|
+
const existingByIndex = new Map(this.subGoals.map((goal)=>[
|
|
74
|
+
goal.index,
|
|
75
|
+
goal
|
|
76
|
+
]));
|
|
77
|
+
const mergedSubGoals = subGoals.map((goal)=>{
|
|
78
|
+
const existingGoal = existingByIndex.get(goal.index);
|
|
79
|
+
const hasNonEmptyDescription = goal.description.trim().length > 0;
|
|
80
|
+
if (!existingGoal && !hasNonEmptyDescription) return null;
|
|
81
|
+
return {
|
|
82
|
+
...goal,
|
|
83
|
+
description: hasNonEmptyDescription || !existingGoal ? goal.description : existingGoal.description
|
|
84
|
+
};
|
|
85
|
+
});
|
|
86
|
+
const validSubGoals = mergedSubGoals.filter((goal)=>null !== goal);
|
|
87
|
+
if (0 === validSubGoals.length) return;
|
|
88
|
+
this.setSubGoals(validSubGoals);
|
|
89
|
+
}
|
|
90
|
+
updateSubGoal(index, updates) {
|
|
91
|
+
const goal = this.subGoals.find((g)=>g.index === index);
|
|
92
|
+
if (!goal) return false;
|
|
93
|
+
let changed = false;
|
|
94
|
+
if (void 0 !== updates.status && updates.status !== goal.status) {
|
|
95
|
+
goal.status = updates.status;
|
|
96
|
+
changed = true;
|
|
97
|
+
}
|
|
98
|
+
if (void 0 !== updates.description && updates.description !== goal.description) {
|
|
99
|
+
goal.description = updates.description;
|
|
100
|
+
changed = true;
|
|
101
|
+
}
|
|
102
|
+
if (changed) goal.logs = [];
|
|
103
|
+
return true;
|
|
104
|
+
}
|
|
105
|
+
markFirstPendingAsRunning() {
|
|
106
|
+
const firstPending = this.subGoals.find((g)=>'pending' === g.status);
|
|
107
|
+
if (firstPending) {
|
|
108
|
+
firstPending.status = 'running';
|
|
109
|
+
firstPending.logs = [];
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
markSubGoalFinished(index) {
|
|
113
|
+
const result = this.updateSubGoal(index, {
|
|
114
|
+
status: 'finished'
|
|
115
|
+
});
|
|
116
|
+
if (result) this.markFirstPendingAsRunning();
|
|
117
|
+
return result;
|
|
118
|
+
}
|
|
119
|
+
markAllSubGoalsFinished() {
|
|
120
|
+
for (const goal of this.subGoals){
|
|
121
|
+
if ('finished' !== goal.status) goal.logs = [];
|
|
122
|
+
goal.status = 'finished';
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
appendSubGoalLog(log) {
|
|
126
|
+
if (!log) return;
|
|
127
|
+
const runningGoal = this.subGoals.find((g)=>'running' === g.status);
|
|
128
|
+
if (runningGoal) {
|
|
129
|
+
if (!runningGoal.logs) runningGoal.logs = [];
|
|
130
|
+
runningGoal.logs.push(log);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
subGoalsToText() {
|
|
134
|
+
if (0 === this.subGoals.length) return '';
|
|
135
|
+
const lines = this.subGoals.map((goal)=>`${goal.index}. ${goal.description} (${goal.status})`);
|
|
136
|
+
const currentGoal = this.subGoals.find((goal)=>'running' === goal.status) || this.subGoals.find((goal)=>'pending' === goal.status);
|
|
137
|
+
let currentGoalText = '';
|
|
138
|
+
if (currentGoal) {
|
|
139
|
+
currentGoalText = `\nCurrent sub-goal is: ${currentGoal.description}`;
|
|
140
|
+
if (currentGoal.logs && currentGoal.logs.length > 0) {
|
|
141
|
+
const logLines = currentGoal.logs.map((log)=>`- ${log}`).join('\n');
|
|
142
|
+
currentGoalText += `\nActions performed for current sub-goal:\n${logLines}`;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return `Sub-goals:\n${lines.join('\n')}${currentGoalText}`;
|
|
146
|
+
}
|
|
147
|
+
appendHistoricalLog(log) {
|
|
148
|
+
if (log) this.historicalLogs.push(log);
|
|
149
|
+
}
|
|
150
|
+
historicalLogsToText() {
|
|
151
|
+
if (0 === this.historicalLogs.length) return '';
|
|
152
|
+
const logLines = this.historicalLogs.map((log)=>`- ${log}`).join('\n');
|
|
153
|
+
return `Here are the steps that have been executed:\n${logLines}`;
|
|
154
|
+
}
|
|
155
|
+
appendMemory(memory) {
|
|
156
|
+
if (memory) this.memories.push(memory);
|
|
157
|
+
}
|
|
158
|
+
getMemories() {
|
|
159
|
+
return [
|
|
160
|
+
...this.memories
|
|
161
|
+
];
|
|
162
|
+
}
|
|
163
|
+
memoriesToText() {
|
|
164
|
+
if (0 === this.memories.length) return '';
|
|
165
|
+
return `Memories from previous steps:\n---\n${this.memories.join('\n---\n')}\n`;
|
|
166
|
+
}
|
|
167
|
+
clearMemories() {
|
|
168
|
+
this.memories.length = 0;
|
|
169
|
+
}
|
|
170
|
+
compressHistory(threshold, keepCount) {
|
|
171
|
+
if (this.messages.length <= threshold) return false;
|
|
172
|
+
const omittedCount = this.messages.length - keepCount;
|
|
173
|
+
const omittedPlaceholder = {
|
|
174
|
+
role: 'user',
|
|
175
|
+
content: `(${omittedCount} previous conversation messages have been omitted)`
|
|
176
|
+
};
|
|
177
|
+
const recentMessages = this.messages.slice(-keepCount);
|
|
178
|
+
this.messages.length = 0;
|
|
179
|
+
this.messages.push(omittedPlaceholder);
|
|
180
|
+
for (const msg of recentMessages)this.messages.push(msg);
|
|
181
|
+
return true;
|
|
182
|
+
}
|
|
183
|
+
constructor(options){
|
|
184
|
+
_define_property(this, "messages", []);
|
|
185
|
+
_define_property(this, "subGoals", []);
|
|
186
|
+
_define_property(this, "memories", []);
|
|
187
|
+
_define_property(this, "historicalLogs", []);
|
|
188
|
+
_define_property(this, "pendingFeedbackMessage", void 0);
|
|
189
|
+
if (options?.initialMessages?.length) this.seed(options.initialMessages);
|
|
190
|
+
this.pendingFeedbackMessage = '';
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
export { ConversationHistory };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { AIResponseParseError, callAI, callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
|
|
2
|
+
import { runConnectivityTest } from "./connectivity.mjs";
|
|
3
|
+
import { systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
4
|
+
import { generatePlaywrightTest, generatePlaywrightTestStream } from "./prompt/playwright-generator.mjs";
|
|
5
|
+
import { generateYamlTest, generateYamlTestStream } from "./prompt/yaml-generator.mjs";
|
|
6
|
+
import { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection } from "./inspect.mjs";
|
|
7
|
+
import { plan } from "./llm-planning.mjs";
|
|
8
|
+
import { autoGLMPlanning } from "./auto-glm/planning.mjs";
|
|
9
|
+
import { PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, dumpActionParam, findAllMidsceneLocatorField, getMidsceneLocationSchema, parseActionParam } from "../common.mjs";
|
|
10
|
+
import { uiTarsPlanning } from "./ui-tars-planning.mjs";
|
|
11
|
+
import { ConversationHistory } from "./conversation-history.mjs";
|
|
12
|
+
export { AIResponseParseError, AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection, ConversationHistory, PointSchema, RectSchema, SizeSchema, TMultimodalPromptSchema, TUserPromptSchema, adaptBboxToRect, autoGLMPlanning, callAI, callAIWithObjectResponse, callAIWithStringResponse, dumpActionParam, findAllMidsceneLocatorField, generatePlaywrightTest, generatePlaywrightTestStream, generateYamlTest, generateYamlTestStream, getMidsceneLocationSchema, parseActionParam, plan, runConnectivityTest, systemPromptToLocateElement, uiTarsPlanning };
|
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
import { generateElementByPoint, generateElementByRect } from "@godscene/shared/extractor/dom-util";
|
|
2
|
+
import { cropByRect, paddingToMatchBlockByBase64, preProcessImageUrl, scaleImage } from "@godscene/shared/img";
|
|
3
|
+
import { getDebug } from "@godscene/shared/logger";
|
|
4
|
+
import { assert } from "@godscene/shared/utils";
|
|
5
|
+
import { adaptBboxToRect, expandSearchArea, mergeRects } from "../common.mjs";
|
|
6
|
+
import { parseAutoGLMLocateResponse } from "./auto-glm/parser.mjs";
|
|
7
|
+
import { getAutoGLMLocatePrompt } from "./auto-glm/prompt.mjs";
|
|
8
|
+
import { isAutoGLM } from "./auto-glm/util.mjs";
|
|
9
|
+
import { extractDataQueryPrompt, parseXMLExtractionResponse, systemPromptToExtract } from "./prompt/extraction.mjs";
|
|
10
|
+
import { findElementPrompt, systemPromptToLocateElement } from "./prompt/llm-locator.mjs";
|
|
11
|
+
import { sectionLocatorInstruction, systemPromptToLocateSection } from "./prompt/llm-section-locator.mjs";
|
|
12
|
+
import { orderSensitiveJudgePrompt, systemPromptToJudgeOrderSensitive } from "./prompt/order-sensitive-judge.mjs";
|
|
13
|
+
import { AIResponseParseError, callAI, callAIWithObjectResponse, callAIWithStringResponse } from "./service-caller/index.mjs";
|
|
14
|
+
const debugInspect = getDebug('ai:inspect');
|
|
15
|
+
const debugSection = getDebug('ai:section');
|
|
16
|
+
async function buildSearchAreaConfig(options) {
|
|
17
|
+
const { context, baseRect, modelFamily } = options;
|
|
18
|
+
const scaleRatio = 2;
|
|
19
|
+
const sectionRect = expandSearchArea(baseRect, context.shotSize);
|
|
20
|
+
const croppedResult = await cropByRect(context.screenshot.base64, sectionRect, 'qwen2.5-vl' === modelFamily);
|
|
21
|
+
const scaledResult = await scaleImage(croppedResult.imageBase64, scaleRatio);
|
|
22
|
+
sectionRect.width = scaledResult.width;
|
|
23
|
+
sectionRect.height = scaledResult.height;
|
|
24
|
+
return {
|
|
25
|
+
rect: sectionRect,
|
|
26
|
+
imageBase64: scaledResult.imageBase64,
|
|
27
|
+
scale: scaleRatio
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
const extraTextFromUserPrompt = (prompt)=>{
|
|
31
|
+
if ('string' == typeof prompt) return prompt;
|
|
32
|
+
return prompt.prompt;
|
|
33
|
+
};
|
|
34
|
+
const promptsToChatParam = async (multimodalPrompt)=>{
|
|
35
|
+
const msgs = [];
|
|
36
|
+
if (multimodalPrompt?.images?.length) {
|
|
37
|
+
msgs.push({
|
|
38
|
+
role: 'user',
|
|
39
|
+
content: [
|
|
40
|
+
{
|
|
41
|
+
type: 'text',
|
|
42
|
+
text: 'Next, I will provide all the reference images.'
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
});
|
|
46
|
+
for (const item of multimodalPrompt.images){
|
|
47
|
+
const base64 = await preProcessImageUrl(item.url, !!multimodalPrompt.convertHttpImage2Base64);
|
|
48
|
+
msgs.push({
|
|
49
|
+
role: 'user',
|
|
50
|
+
content: [
|
|
51
|
+
{
|
|
52
|
+
type: 'text',
|
|
53
|
+
text: `this is the reference image named '${item.name}':`
|
|
54
|
+
}
|
|
55
|
+
]
|
|
56
|
+
});
|
|
57
|
+
msgs.push({
|
|
58
|
+
role: 'user',
|
|
59
|
+
content: [
|
|
60
|
+
{
|
|
61
|
+
type: 'image_url',
|
|
62
|
+
image_url: {
|
|
63
|
+
url: base64,
|
|
64
|
+
detail: 'high'
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
]
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return msgs;
|
|
72
|
+
};
|
|
73
|
+
async function AiLocateElement(options) {
|
|
74
|
+
const { context, targetElementDescription, modelConfig } = options;
|
|
75
|
+
const { modelFamily } = modelConfig;
|
|
76
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
77
|
+
assert(targetElementDescription, "cannot find the target element description");
|
|
78
|
+
const targetElementDescriptionText = extraTextFromUserPrompt(targetElementDescription);
|
|
79
|
+
const userInstructionPrompt = findElementPrompt(targetElementDescriptionText);
|
|
80
|
+
const systemPrompt = isAutoGLM(modelFamily) ? getAutoGLMLocatePrompt(modelFamily) : systemPromptToLocateElement(modelFamily);
|
|
81
|
+
let imagePayload = screenshotBase64;
|
|
82
|
+
let imageWidth = context.shotSize.width;
|
|
83
|
+
let imageHeight = context.shotSize.height;
|
|
84
|
+
let originalImageWidth = imageWidth;
|
|
85
|
+
let originalImageHeight = imageHeight;
|
|
86
|
+
if (options.searchConfig) {
|
|
87
|
+
assert(options.searchConfig.rect, 'searchArea is provided but its rect cannot be found. Failed to locate element');
|
|
88
|
+
assert(options.searchConfig.imageBase64, 'searchArea is provided but its imageBase64 cannot be found. Failed to locate element');
|
|
89
|
+
imagePayload = options.searchConfig.imageBase64;
|
|
90
|
+
imageWidth = options.searchConfig.rect?.width;
|
|
91
|
+
imageHeight = options.searchConfig.rect?.height;
|
|
92
|
+
originalImageWidth = imageWidth;
|
|
93
|
+
originalImageHeight = imageHeight;
|
|
94
|
+
} else if ('qwen2.5-vl' === modelFamily) {
|
|
95
|
+
const paddedResult = await paddingToMatchBlockByBase64(imagePayload);
|
|
96
|
+
imageWidth = paddedResult.width;
|
|
97
|
+
imageHeight = paddedResult.height;
|
|
98
|
+
imagePayload = paddedResult.imageBase64;
|
|
99
|
+
}
|
|
100
|
+
const msgs = [
|
|
101
|
+
{
|
|
102
|
+
role: 'system',
|
|
103
|
+
content: systemPrompt
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
role: 'user',
|
|
107
|
+
content: [
|
|
108
|
+
{
|
|
109
|
+
type: 'image_url',
|
|
110
|
+
image_url: {
|
|
111
|
+
url: imagePayload,
|
|
112
|
+
detail: 'high'
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
{
|
|
116
|
+
type: 'text',
|
|
117
|
+
text: isAutoGLM(modelFamily) ? `Tap: ${userInstructionPrompt}` : userInstructionPrompt
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
];
|
|
122
|
+
if ('string' != typeof targetElementDescription) {
|
|
123
|
+
const addOns = await promptsToChatParam({
|
|
124
|
+
images: targetElementDescription.images,
|
|
125
|
+
convertHttpImage2Base64: targetElementDescription.convertHttpImage2Base64
|
|
126
|
+
});
|
|
127
|
+
msgs.push(...addOns);
|
|
128
|
+
}
|
|
129
|
+
if (isAutoGLM(modelFamily)) {
|
|
130
|
+
const { content: rawResponseContent, usage } = await callAIWithStringResponse(msgs, modelConfig, {
|
|
131
|
+
abortSignal: options.abortSignal
|
|
132
|
+
});
|
|
133
|
+
debugInspect('auto-glm rawResponse:', rawResponseContent);
|
|
134
|
+
const parsed = parseAutoGLMLocateResponse(rawResponseContent);
|
|
135
|
+
debugInspect('auto-glm thinking:', parsed.think);
|
|
136
|
+
debugInspect('auto-glm coordinates:', parsed.coordinates);
|
|
137
|
+
let resRect;
|
|
138
|
+
let matchedElements = [];
|
|
139
|
+
let errors = [];
|
|
140
|
+
if (parsed.error || !parsed.coordinates) {
|
|
141
|
+
errors = [
|
|
142
|
+
parsed.error || 'Failed to parse auto-glm response'
|
|
143
|
+
];
|
|
144
|
+
debugInspect('auto-glm parse error:', errors[0]);
|
|
145
|
+
} else {
|
|
146
|
+
const { x, y } = parsed.coordinates;
|
|
147
|
+
debugInspect('auto-glm coordinates [0-999]:', {
|
|
148
|
+
x,
|
|
149
|
+
y
|
|
150
|
+
});
|
|
151
|
+
const pixelX = Math.round(x * imageWidth / 1000);
|
|
152
|
+
const pixelY = Math.round(y * imageHeight / 1000);
|
|
153
|
+
debugInspect('auto-glm pixel coordinates:', {
|
|
154
|
+
pixelX,
|
|
155
|
+
pixelY
|
|
156
|
+
});
|
|
157
|
+
let finalX = pixelX;
|
|
158
|
+
let finalY = pixelY;
|
|
159
|
+
if (options.searchConfig?.rect) {
|
|
160
|
+
finalX += options.searchConfig.rect.left;
|
|
161
|
+
finalY += options.searchConfig.rect.top;
|
|
162
|
+
}
|
|
163
|
+
const element = generateElementByPoint([
|
|
164
|
+
finalX,
|
|
165
|
+
finalY
|
|
166
|
+
], targetElementDescriptionText);
|
|
167
|
+
resRect = element.rect;
|
|
168
|
+
debugInspect('auto-glm resRect:', resRect);
|
|
169
|
+
if (element) matchedElements = [
|
|
170
|
+
element
|
|
171
|
+
];
|
|
172
|
+
}
|
|
173
|
+
return {
|
|
174
|
+
rect: resRect,
|
|
175
|
+
parseResult: {
|
|
176
|
+
elements: matchedElements,
|
|
177
|
+
errors
|
|
178
|
+
},
|
|
179
|
+
rawResponse: rawResponseContent,
|
|
180
|
+
usage,
|
|
181
|
+
reasoning_content: parsed.think
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
let res;
|
|
185
|
+
try {
|
|
186
|
+
res = await callAIWithObjectResponse(msgs, modelConfig, {
|
|
187
|
+
abortSignal: options.abortSignal
|
|
188
|
+
});
|
|
189
|
+
} catch (callError) {
|
|
190
|
+
const errorMessage = callError instanceof Error ? callError.message : String(callError);
|
|
191
|
+
const rawResponse = callError instanceof AIResponseParseError ? callError.rawResponse : errorMessage;
|
|
192
|
+
const usage = callError instanceof AIResponseParseError ? callError.usage : void 0;
|
|
193
|
+
return {
|
|
194
|
+
rect: void 0,
|
|
195
|
+
parseResult: {
|
|
196
|
+
elements: [],
|
|
197
|
+
errors: [
|
|
198
|
+
`AI call error: ${errorMessage}`
|
|
199
|
+
]
|
|
200
|
+
},
|
|
201
|
+
rawResponse,
|
|
202
|
+
usage,
|
|
203
|
+
reasoning_content: void 0
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
const rawResponse = JSON.stringify(res.content);
|
|
207
|
+
let resRect;
|
|
208
|
+
let matchedElements = [];
|
|
209
|
+
let errors = 'errors' in res.content ? res.content.errors : [];
|
|
210
|
+
try {
|
|
211
|
+
if ('bbox' in res.content && Array.isArray(res.content.bbox) && res.content.bbox.length >= 1) {
|
|
212
|
+
resRect = adaptBboxToRect(res.content.bbox, imageWidth, imageHeight, options.searchConfig?.rect?.left, options.searchConfig?.rect?.top, originalImageWidth, originalImageHeight, modelFamily, options.searchConfig?.scale);
|
|
213
|
+
debugInspect('resRect', resRect);
|
|
214
|
+
const element = generateElementByRect(resRect, targetElementDescriptionText);
|
|
215
|
+
errors = [];
|
|
216
|
+
if (element) matchedElements = [
|
|
217
|
+
element
|
|
218
|
+
];
|
|
219
|
+
}
|
|
220
|
+
} catch (e) {
|
|
221
|
+
const msg = e instanceof Error ? `Failed to parse bbox: ${e.message}` : 'unknown error in locate';
|
|
222
|
+
if (errors && errors?.length !== 0) errors.push(`(${msg})`);
|
|
223
|
+
else errors = [
|
|
224
|
+
msg
|
|
225
|
+
];
|
|
226
|
+
}
|
|
227
|
+
return {
|
|
228
|
+
rect: resRect,
|
|
229
|
+
parseResult: {
|
|
230
|
+
elements: matchedElements,
|
|
231
|
+
errors: errors
|
|
232
|
+
},
|
|
233
|
+
rawResponse,
|
|
234
|
+
usage: res.usage,
|
|
235
|
+
reasoning_content: res.reasoning_content
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
async function AiLocateSection(options) {
|
|
239
|
+
const { context, sectionDescription, modelConfig } = options;
|
|
240
|
+
const { modelFamily } = modelConfig;
|
|
241
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
242
|
+
const systemPrompt = systemPromptToLocateSection(modelFamily);
|
|
243
|
+
const sectionLocatorInstructionText = sectionLocatorInstruction(extraTextFromUserPrompt(sectionDescription));
|
|
244
|
+
const msgs = [
|
|
245
|
+
{
|
|
246
|
+
role: 'system',
|
|
247
|
+
content: systemPrompt
|
|
248
|
+
},
|
|
249
|
+
{
|
|
250
|
+
role: 'user',
|
|
251
|
+
content: [
|
|
252
|
+
{
|
|
253
|
+
type: 'image_url',
|
|
254
|
+
image_url: {
|
|
255
|
+
url: screenshotBase64,
|
|
256
|
+
detail: 'high'
|
|
257
|
+
}
|
|
258
|
+
},
|
|
259
|
+
{
|
|
260
|
+
type: 'text',
|
|
261
|
+
text: sectionLocatorInstructionText
|
|
262
|
+
}
|
|
263
|
+
]
|
|
264
|
+
}
|
|
265
|
+
];
|
|
266
|
+
if ('string' != typeof sectionDescription) {
|
|
267
|
+
const addOns = await promptsToChatParam({
|
|
268
|
+
images: sectionDescription.images,
|
|
269
|
+
convertHttpImage2Base64: sectionDescription.convertHttpImage2Base64
|
|
270
|
+
});
|
|
271
|
+
msgs.push(...addOns);
|
|
272
|
+
}
|
|
273
|
+
let result;
|
|
274
|
+
try {
|
|
275
|
+
result = await callAIWithObjectResponse(msgs, modelConfig, {
|
|
276
|
+
abortSignal: options.abortSignal
|
|
277
|
+
});
|
|
278
|
+
} catch (callError) {
|
|
279
|
+
const errorMessage = callError instanceof Error ? callError.message : String(callError);
|
|
280
|
+
const rawResponse = callError instanceof AIResponseParseError ? callError.rawResponse : errorMessage;
|
|
281
|
+
const usage = callError instanceof AIResponseParseError ? callError.usage : void 0;
|
|
282
|
+
return {
|
|
283
|
+
rect: void 0,
|
|
284
|
+
imageBase64: void 0,
|
|
285
|
+
error: `AI call error: ${errorMessage}`,
|
|
286
|
+
rawResponse,
|
|
287
|
+
usage
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
let searchAreaConfig;
|
|
291
|
+
const sectionBbox = result.content.bbox;
|
|
292
|
+
if (sectionBbox) {
|
|
293
|
+
const targetRect = adaptBboxToRect(sectionBbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily);
|
|
294
|
+
debugSection('original targetRect %j', targetRect);
|
|
295
|
+
const referenceBboxList = result.content.references_bbox || [];
|
|
296
|
+
debugSection('referenceBboxList %j', referenceBboxList);
|
|
297
|
+
const referenceRects = referenceBboxList.filter((bbox)=>Array.isArray(bbox)).map((bbox)=>adaptBboxToRect(bbox, context.shotSize.width, context.shotSize.height, 0, 0, context.shotSize.width, context.shotSize.height, modelFamily));
|
|
298
|
+
debugSection('referenceRects %j', referenceRects);
|
|
299
|
+
const mergedRect = mergeRects([
|
|
300
|
+
targetRect,
|
|
301
|
+
...referenceRects
|
|
302
|
+
]);
|
|
303
|
+
debugSection('mergedRect %j', mergedRect);
|
|
304
|
+
const expandedRect = expandSearchArea(mergedRect, context.shotSize);
|
|
305
|
+
const originalWidth = expandedRect.width;
|
|
306
|
+
const originalHeight = expandedRect.height;
|
|
307
|
+
debugSection('expanded sectionRect %j', expandedRect);
|
|
308
|
+
searchAreaConfig = await buildSearchAreaConfig({
|
|
309
|
+
context,
|
|
310
|
+
baseRect: mergedRect,
|
|
311
|
+
modelFamily
|
|
312
|
+
});
|
|
313
|
+
debugSection('scaled sectionRect from %dx%d to %dx%d (scale=%d)', originalWidth, originalHeight, searchAreaConfig.rect.width, searchAreaConfig.rect.height, searchAreaConfig.scale);
|
|
314
|
+
}
|
|
315
|
+
return {
|
|
316
|
+
rect: searchAreaConfig?.rect,
|
|
317
|
+
imageBase64: searchAreaConfig?.imageBase64,
|
|
318
|
+
scale: searchAreaConfig?.scale,
|
|
319
|
+
error: result.content.error,
|
|
320
|
+
rawResponse: JSON.stringify(result.content),
|
|
321
|
+
usage: result.usage
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
async function AiExtractElementInfo(options) {
|
|
325
|
+
const { dataQuery, context, extractOption, multimodalPrompt, modelConfig } = options;
|
|
326
|
+
const systemPrompt = systemPromptToExtract();
|
|
327
|
+
const screenshotBase64 = context.screenshot.base64;
|
|
328
|
+
const extractDataPromptText = extractDataQueryPrompt(options.pageDescription || '', dataQuery);
|
|
329
|
+
const userContent = [];
|
|
330
|
+
if (extractOption?.screenshotIncluded !== false) userContent.push({
|
|
331
|
+
type: 'image_url',
|
|
332
|
+
image_url: {
|
|
333
|
+
url: screenshotBase64,
|
|
334
|
+
detail: 'high'
|
|
335
|
+
}
|
|
336
|
+
});
|
|
337
|
+
userContent.push({
|
|
338
|
+
type: 'text',
|
|
339
|
+
text: extractDataPromptText
|
|
340
|
+
});
|
|
341
|
+
const msgs = [
|
|
342
|
+
{
|
|
343
|
+
role: 'system',
|
|
344
|
+
content: systemPrompt
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
role: 'user',
|
|
348
|
+
content: userContent
|
|
349
|
+
}
|
|
350
|
+
];
|
|
351
|
+
if (multimodalPrompt) {
|
|
352
|
+
const addOns = await promptsToChatParam({
|
|
353
|
+
images: multimodalPrompt.images,
|
|
354
|
+
convertHttpImage2Base64: multimodalPrompt.convertHttpImage2Base64
|
|
355
|
+
});
|
|
356
|
+
msgs.push(...addOns);
|
|
357
|
+
}
|
|
358
|
+
const { content: rawResponse, usage, reasoning_content } = await callAI(msgs, modelConfig);
|
|
359
|
+
let parseResult;
|
|
360
|
+
try {
|
|
361
|
+
parseResult = parseXMLExtractionResponse(rawResponse);
|
|
362
|
+
} catch (parseError) {
|
|
363
|
+
const errorMessage = parseError instanceof Error ? parseError.message : String(parseError);
|
|
364
|
+
throw new AIResponseParseError(`XML parse error: ${errorMessage}`, rawResponse, usage);
|
|
365
|
+
}
|
|
366
|
+
return {
|
|
367
|
+
parseResult,
|
|
368
|
+
rawResponse,
|
|
369
|
+
usage,
|
|
370
|
+
reasoning_content
|
|
371
|
+
};
|
|
372
|
+
}
|
|
373
|
+
async function AiJudgeOrderSensitive(description, callAIFn, modelConfig) {
|
|
374
|
+
const systemPrompt = systemPromptToJudgeOrderSensitive();
|
|
375
|
+
const userPrompt = orderSensitiveJudgePrompt(description);
|
|
376
|
+
const msgs = [
|
|
377
|
+
{
|
|
378
|
+
role: 'system',
|
|
379
|
+
content: systemPrompt
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
role: 'user',
|
|
383
|
+
content: userPrompt
|
|
384
|
+
}
|
|
385
|
+
];
|
|
386
|
+
debugInspect("AiJudgeOrderSensitive: deepThink=false, description=%s", description);
|
|
387
|
+
const result = await callAIFn(msgs, modelConfig, {
|
|
388
|
+
deepThink: false
|
|
389
|
+
});
|
|
390
|
+
return {
|
|
391
|
+
isOrderSensitive: result.content.isOrderSensitive ?? false,
|
|
392
|
+
usage: result.usage
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
export { AiExtractElementInfo, AiJudgeOrderSensitive, AiLocateElement, AiLocateSection, buildSearchAreaConfig };
|