rl-simulator-core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +3 -0
- package/dist/index.d.mts +565 -0
- package/dist/index.d.ts +565 -0
- package/dist/index.js +512 -0
- package/dist/index.mjs +476 -0
- package/package.json +21 -0
- package/src/actions.js +145 -0
- package/src/ai.js +86 -0
- package/src/config.js +83 -0
- package/src/index.js +155 -0
- package/src/runner.js +138 -0
package/.env
ADDED
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { chromium } from 'playwright';
|
|
4
|
+
import { createOpenAI } from '@ai-sdk/openai';
|
|
5
|
+
import { generateText } from 'ai';
|
|
6
|
+
import dotenv from 'dotenv';
|
|
7
|
+
import AdmZip from 'adm-zip';
|
|
8
|
+
|
|
9
|
+
dotenv.config();
|
|
10
|
+
|
|
11
|
+
const API_KEY = process.env.ARK_API_KEY;
|
|
12
|
+
const ENDPOINT = process.env.EP;
|
|
13
|
+
const BASE_URL = process.env.BASE_URL;
|
|
14
|
+
|
|
15
|
+
if (!API_KEY || !ENDPOINT || !BASE_URL) {
|
|
16
|
+
console.warn("Missing environment variables (ARK_API_KEY, EP, BASE_URL). Agent may fail if not provided later.");
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const SYSTEM_PROMPT = `
|
|
20
|
+
Intro to Think Mode
|
|
21
|
+
To explore the performance of our model with different styles of thinking, we introduce four modes of thinking: budgted, efficient, no think, and unrestricted. The model is default to unrestricted think mode.
|
|
22
|
+
- Budgeted think mode: Limit model's internal reasoning and reflections to stay within the specified token budget.
|
|
23
|
+
- Efficient think mode: Model provides a concise internal thinking process with efficient reasoning and reflections. It doesn't have a strict token budget, but should be less verbose and more direct in your thinking.
|
|
24
|
+
- No think mode: Model responds directly to the question without any internal reasoning process or extra thinking tokens. Still follows the template with the minimum required thinking tokens to justify the answer.
|
|
25
|
+
- Unrestricted think mode: Model engages in an internal thinking process with thorough reasoning and reflections. There's an unlimited budget for thinking tokens and can continue thinking until you fully solve the problem.
|
|
26
|
+
Think Mode Assignment
|
|
27
|
+
As the "think mode" sp is not encapsulated yet, you need to manually sew in different sp for different modes of thinking. Here are the sp's:
|
|
28
|
+
NOTE: You should disable "thinking" first: (I know it's a bit counterintuitive.)
|
|
29
|
+
"thinking": {
|
|
30
|
+
"type": "disabled"
|
|
31
|
+
}
|
|
32
|
+
- Unrestricted think mode
|
|
33
|
+
You should begin by detailing the internal reasoning process, and then present the answer to the user. The reasoning process should be enclosed within <think_never_used_51bce0c785ca2f68081bfa7d91973934> and </think_never_used_51bce0c785ca2f68081bfa7d91973934> tags, as follows:
|
|
34
|
+
<think_never_used_51bce0c785ca2f68081bfa7d91973934> reasoning process here </think_never_used_51bce0c785ca2f68081bfa7d91973934> answer here.
|
|
35
|
+
You have different modes of thinking:
|
|
36
|
+
Unrestricted think mode: Engage in an internal thinking process with thorough reasoning and reflections. You have an unlimited budget for thinking tokens and can continue thinking until you fully solve the problem.
|
|
37
|
+
Efficient think mode: Provide a concise internal thinking process with efficient reasoning and reflections. You don't have a strict token budget but be less verbose and more direct in your thinking.
|
|
38
|
+
No think mode: Respond directly to the question without any internal reasoning process or extra thinking tokens. Still follow the template with the minimum required thinking tokens to justify the answer.
|
|
39
|
+
Budgeted think mode: Limit your internal reasoning and reflections to stay within the specified token budget
|
|
40
|
+
Based on the complexity of the problem, select the appropriate mode for reasoning among the provided options listed below.
|
|
41
|
+
Provided Mode(s):
|
|
42
|
+
Unrestricted think
|
|
43
|
+
Action Space
|
|
44
|
+
"""
|
|
45
|
+
## Function Definition
|
|
46
|
+
- You have access to the following functions:
|
|
47
|
+
{"type": "function", "name": "click", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Click coordinates. The format is: x y"}}, "required": ["point"]}, "description": "Mouse left single click action."}
|
|
48
|
+
{"type": "function", "name": "left_double", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Click coordinates. The format is: x y"}}, "required": ["point"]}, "description": "Mouse left double click action."}
|
|
49
|
+
{"type": "function", "name": "left_triple", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Click coordinates. The format is: x y"}}, "required": ["point"]}, "description": "Mouse left triple click action."}
|
|
50
|
+
{"type": "function", "name": "right_single", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Click coordinates. The format is: x y"}}, "required": ["point"]}, "description": "Mouse right single click action."}
|
|
51
|
+
{"type": "function", "name": "drag", "parameters": {"type": "object", "properties": {"start_point": {"type": "string", "description": "Drag start point. The format is: x y"}, "end_point": {"type": "string", "description": "Drag end point. The format is: x y"}}, "required": ["start_point", "end_point"]}, "description": "Mouse left button drag action."}
|
|
52
|
+
{"type": "function", "name": "scroll", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Scroll start position. If not specified, default to execute on the current mouse position. The format is: x y"}, "direction": {"type": "string", "description": "Scroll direction.", "enum": ["up", "down", "left", "right"]}}, "required": ["direction"]}, "description": "Scroll action."}
|
|
53
|
+
{"type": "function", "name": "move_to", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Target coordinates. The format is: x y"}}, "required": ["point"]}, "description": "Mouse move action."}
|
|
54
|
+
{"type": "function", "name": "mouse_down", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Mouse down position. If not specified, default to execute on the current mouse position. The format is: x y"}, "button": {"type": "string", "description": "Down button. Default to left.", "enum": ["left", "right"]}}, "required": []}, "description": "Mouse down action."}
|
|
55
|
+
{"type": "function", "name": "mouse_up", "parameters": {"type": "object", "properties": {"point": {"type": "string", "description": "Mouse up position. If not specified, default to execute on the current mouse position. The format is: x y"}, "button": {"type": "string", "description": "Up button. Default to left.", "enum": ["left", "right"]}}, "required": []}, "description": "Mouse up action."}
|
|
56
|
+
{"type": "function", "name": "type", "parameters": {"type": "object", "properties": {"content": {"type": "string", "description": "Type content. If you want to submit your input, use \\n at the end of content."}}, "required": ["content"]}, "description": "Type content."}
|
|
57
|
+
{"type": "function", "name": "hotkey", "parameters": {"type": "object", "properties": {"key": {"type": "string", "description": "Hotkeys you want to press. Split keys with a space and use lowercase."}}, "required": ["key"]}, "description": "Press hotkey."}
|
|
58
|
+
{"type": "function", "name": "press", "parameters": {"type": "object", "properties": {"key": {"type": "string", "description": "Key you want to press. Only one key can be pressed at one time."}}, "required": ["key"]}, "description": "Press key."}
|
|
59
|
+
{"type": "function", "name": "call_user", "parameters": {"type": "object", "properties": {"content": {"type": "string", "description": "Message or information displayed to the user to request their input, feedback, or guidance."}}, "required": []}, "description": "This function is used to interact with the user by displaying a message and requesting their input, feedback, or guidance."}
|
|
60
|
+
{"type": "function", "name": "wait", "parameters": {"type": "object", "properties": {"time": {"type": "string", "description": "Wait time in seconds."}}, "required": []}, "description": "Wait for a while."}
|
|
61
|
+
{"type": "function", "name": "finished", "parameters": {"type": "object", "properties": {"content": {"type": "string", "description": "Provide the final answer or response to complete the task."}}, "required": []}, "description": "This function is used to indicate the completion of a task by providing the final answer or response."}
|
|
62
|
+
|
|
63
|
+
- To call a function, use the following structure without any suffix:
|
|
64
|
+
|
|
65
|
+
<gui_think> reasoning process </gui_think>
|
|
66
|
+
<seed:tool_call_never_used_51bce0c785ca2f68081bfa7d91973934><function_never_used_51bce0c785ca2f68081bfa7d91973934=example_function_name><parameter_never_used_51bce0c785ca2f68081bfa7d91973934=example_parameter_1>value_1</parameter_never_used_51bce0c785ca2f68081bfa7d91973934><parameter_never_used_51bce0c785ca2f68081bfa7d91973934=example_parameter_2>
|
|
67
|
+
This is the value for the second parameter
|
|
68
|
+
that can span
|
|
69
|
+
multiple lines
|
|
70
|
+
</parameter_never_used_51bce0c785ca2f68081bfa7d91973934></function_never_used_51bce0c785ca2f68081bfa7d91973934></seed:tool_call_never_used_51bce0c785ca2f68081bfa7d91973934>
|
|
71
|
+
|
|
72
|
+
## Important Notes
|
|
73
|
+
- Function calls must begin with <function_never_used_51bce0c785ca2f68081bfa7d91973934= and end with </function_never_used_51bce0c785ca2f68081bfa7d91973934>.
|
|
74
|
+
- All required parameters must be explicitly provided.
|
|
75
|
+
|
|
76
|
+
## Additional Notes
|
|
77
|
+
- You can execute multiple actions within a single tool call. For example:
|
|
78
|
+
<seed:tool_call_never_used_51bce0c785ca2f68081bfa7d91973934><function_never_used_51bce0c785ca2f68081bfa7d91973934=example_function_1><parameter_never_used_51bce0c785ca2f68081bfa7d91973934=example_parameter_1>value_1</parameter_never_used_51bce0c785ca2f68081bfa7d91973934><parameter_never_used_51bce0c785ca2f68081bfa7d91973934=example_parameter_2>
|
|
79
|
+
This is the value for the second parameter
|
|
80
|
+
that can span
|
|
81
|
+
multiple lines
|
|
82
|
+
</parameter_never_used_51bce0c785ca2f68081bfa7d91973934></function_never_used_51bce0c785ca2f68081bfa7d91973934><function_never_used_51bce0c785ca2f68081bfa7d91973934=example_function_2><parameter_never_used_51bce0c785ca2f68081bfa7d91973934=example_parameter_3>value_4</parameter_never_used_51bce0c785ca2f68081bfa7d91973934></function_never_used_51bce0c785ca2f68081bfa7d91973934></seed:tool_call_never_used_51bce0c785ca2f68081bfa7d91973934>
|
|
83
|
+
"""`;
|
|
84
|
+
|
|
85
|
+
path.join(process.cwd(), 'ai_traffic.log');
|
|
86
|
+
|
|
87
|
+
const openai = createOpenAI({
|
|
88
|
+
baseURL: BASE_URL,
|
|
89
|
+
apiKey: API_KEY,
|
|
90
|
+
compatibility: 'compatible',
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
async function queryAI(messages) {
|
|
94
|
+
console.log("Querying AI with history length:", messages.length);
|
|
95
|
+
|
|
96
|
+
try {
|
|
97
|
+
console.log("⏳ 发送请求中...");
|
|
98
|
+
const startTime = Date.now();
|
|
99
|
+
|
|
100
|
+
const { text } = await generateText({
|
|
101
|
+
model: openai.chat(ENDPOINT),
|
|
102
|
+
system: SYSTEM_PROMPT,
|
|
103
|
+
messages: messages,
|
|
104
|
+
temperature: 0.1,
|
|
105
|
+
topP: 0.7,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const endTime = Date.now();
|
|
109
|
+
console.log("✅ 请求成功! 耗时:", (endTime - startTime) / 1000, "秒");
|
|
110
|
+
console.log("📥 响应内容:", text.substring(0, 500) + "...");
|
|
111
|
+
|
|
112
|
+
return text;
|
|
113
|
+
|
|
114
|
+
} catch (error) {
|
|
115
|
+
console.error("❌ AI 请求失败!", error);
|
|
116
|
+
return null;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function parseActions(aiContent) {
|
|
121
|
+
const actions = [];
|
|
122
|
+
const functionRegex = /<function[^>]*=([^>]+)>([\s\S]*?)<\/function[^>]*>/g;
|
|
123
|
+
|
|
124
|
+
let functionMatch;
|
|
125
|
+
while ((functionMatch = functionRegex.exec(aiContent)) !== null) {
|
|
126
|
+
const functionName = functionMatch[1];
|
|
127
|
+
const functionBody = functionMatch[2];
|
|
128
|
+
const params = {};
|
|
129
|
+
|
|
130
|
+
const paramRegex = /<parameter[^>]*=([^>]+)>([\s\S]*?)<\/parameter[^>]*>/g;
|
|
131
|
+
let paramMatch;
|
|
132
|
+
while ((paramMatch = paramRegex.exec(functionBody)) !== null) {
|
|
133
|
+
const paramName = paramMatch[1];
|
|
134
|
+
let paramValue = paramMatch[2].trim();
|
|
135
|
+
const cleanedValue = paramValue.replace(/<[^>]+>/g, '').trim();
|
|
136
|
+
params[paramName] = cleanedValue;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
actions.push({
|
|
140
|
+
name: functionName,
|
|
141
|
+
params: params
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return actions;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
async function executeAction(page, action) {
|
|
149
|
+
console.log(`Executing action: ${action.name}`, action.params);
|
|
150
|
+
|
|
151
|
+
const { name, params } = action;
|
|
152
|
+
const viewport = page.viewportSize();
|
|
153
|
+
|
|
154
|
+
// Helper to transform 0-1000 coordinates to actual viewport size
|
|
155
|
+
const transformPoint = (pointStr) => {
|
|
156
|
+
if (!pointStr) return null;
|
|
157
|
+
const [x, y] = pointStr.split(' ').map(Number);
|
|
158
|
+
|
|
159
|
+
if (!viewport) return { x, y };
|
|
160
|
+
|
|
161
|
+
const actualX = (x / 1000) * viewport.width;
|
|
162
|
+
const actualY = (y / 1000) * viewport.height;
|
|
163
|
+
return { x: actualX, y: actualY };
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
switch (name) {
|
|
168
|
+
case 'click':
|
|
169
|
+
if (params.point) {
|
|
170
|
+
const { x, y } = transformPoint(params.point);
|
|
171
|
+
console.log(`Executing click action: (${x}, ${y}) [Original: ${params.point}]`);
|
|
172
|
+
await page.mouse.click(x, y);
|
|
173
|
+
}
|
|
174
|
+
break;
|
|
175
|
+
|
|
176
|
+
case 'left_double':
|
|
177
|
+
if (params.point) {
|
|
178
|
+
const { x, y } = transformPoint(params.point);
|
|
179
|
+
await page.mouse.dblclick(x, y);
|
|
180
|
+
}
|
|
181
|
+
break;
|
|
182
|
+
|
|
183
|
+
case 'left_triple':
|
|
184
|
+
if (params.point) {
|
|
185
|
+
const { x, y } = transformPoint(params.point);
|
|
186
|
+
await page.mouse.click(x, y, { clickCount: 3 });
|
|
187
|
+
}
|
|
188
|
+
break;
|
|
189
|
+
|
|
190
|
+
case 'right_single':
|
|
191
|
+
if (params.point) {
|
|
192
|
+
const { x, y } = transformPoint(params.point);
|
|
193
|
+
await page.mouse.click(x, y, { button: 'right' });
|
|
194
|
+
}
|
|
195
|
+
break;
|
|
196
|
+
|
|
197
|
+
case 'drag':
|
|
198
|
+
if (params.start_point && params.end_point) {
|
|
199
|
+
const start = transformPoint(params.start_point);
|
|
200
|
+
const end = transformPoint(params.end_point);
|
|
201
|
+
await page.mouse.move(start.x, start.y);
|
|
202
|
+
await page.mouse.down();
|
|
203
|
+
await page.mouse.move(end.x, end.y);
|
|
204
|
+
await page.mouse.up();
|
|
205
|
+
}
|
|
206
|
+
break;
|
|
207
|
+
|
|
208
|
+
case 'move_to':
|
|
209
|
+
if (params.point) {
|
|
210
|
+
const { x, y } = transformPoint(params.point);
|
|
211
|
+
await page.mouse.move(x, y);
|
|
212
|
+
}
|
|
213
|
+
break;
|
|
214
|
+
|
|
215
|
+
case 'mouse_down':
|
|
216
|
+
if (params.point) {
|
|
217
|
+
const { x, y } = transformPoint(params.point);
|
|
218
|
+
await page.mouse.move(x, y);
|
|
219
|
+
await page.mouse.down({ button: params.button || 'left' });
|
|
220
|
+
} else {
|
|
221
|
+
await page.mouse.down({ button: params.button || 'left' });
|
|
222
|
+
}
|
|
223
|
+
break;
|
|
224
|
+
|
|
225
|
+
case 'mouse_up':
|
|
226
|
+
if (params.point) {
|
|
227
|
+
const { x, y } = transformPoint(params.point);
|
|
228
|
+
await page.mouse.move(x, y);
|
|
229
|
+
await page.mouse.up({ button: params.button || 'left' });
|
|
230
|
+
} else {
|
|
231
|
+
await page.mouse.up({ button: params.button || 'left' });
|
|
232
|
+
}
|
|
233
|
+
break;
|
|
234
|
+
|
|
235
|
+
case 'type':
|
|
236
|
+
if (params.content) {
|
|
237
|
+
const content = params.content;
|
|
238
|
+
await page.keyboard.type(content);
|
|
239
|
+
}
|
|
240
|
+
break;
|
|
241
|
+
|
|
242
|
+
case 'hotkey':
|
|
243
|
+
if (params.key) {
|
|
244
|
+
const keys = params.key.split(' ').join('+');
|
|
245
|
+
await page.keyboard.press(keys);
|
|
246
|
+
}
|
|
247
|
+
break;
|
|
248
|
+
|
|
249
|
+
case 'press':
|
|
250
|
+
if (params.key) {
|
|
251
|
+
await page.keyboard.press(params.key);
|
|
252
|
+
}
|
|
253
|
+
break;
|
|
254
|
+
|
|
255
|
+
case 'scroll':
|
|
256
|
+
const direction = params.direction;
|
|
257
|
+
if (direction === 'down') {
|
|
258
|
+
await page.evaluate(() => window.scrollBy(0, 500));
|
|
259
|
+
} else if (direction === 'up') {
|
|
260
|
+
await page.evaluate(() => window.scrollBy(0, -500));
|
|
261
|
+
}
|
|
262
|
+
break;
|
|
263
|
+
|
|
264
|
+
case 'wait':
|
|
265
|
+
if (params.time) {
|
|
266
|
+
const ms = parseFloat(params.time) * 1000;
|
|
267
|
+
await page.waitForTimeout(ms);
|
|
268
|
+
}
|
|
269
|
+
break;
|
|
270
|
+
|
|
271
|
+
case 'finished':
|
|
272
|
+
console.log("AI Task Finished:", params.content);
|
|
273
|
+
return 'FINISHED';
|
|
274
|
+
|
|
275
|
+
case 'call_user':
|
|
276
|
+
console.log("AI requests user input:", params.content);
|
|
277
|
+
return 'FINISHED';
|
|
278
|
+
|
|
279
|
+
default:
|
|
280
|
+
console.warn(`Unknown action: ${name}`);
|
|
281
|
+
}
|
|
282
|
+
} catch (e) {
|
|
283
|
+
console.error(`Failed to execute action ${name}:`, e);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
return 'CONTINUE';
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
290
|
+
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
291
|
+
|
|
292
|
+
const screenshotsDir = path.join(process.cwd(), 'screenshots');
|
|
293
|
+
if (!fs.existsSync(screenshotsDir)) {
|
|
294
|
+
fs.mkdirSync(screenshotsDir, { recursive: true });
|
|
295
|
+
console.log(`📁 创建截图目录: ${screenshotsDir}`);
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// 2. Launch Browser
|
|
299
|
+
const browser = await chromium.launch({
|
|
300
|
+
headless: true, // Visible for demo/debug
|
|
301
|
+
args: ['--start-maximized'] // Attempt to maximize
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
const context = await browser.newContext({
|
|
305
|
+
viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
|
|
306
|
+
});
|
|
307
|
+
const page = await context.newPage();
|
|
308
|
+
|
|
309
|
+
let conversationHistory = [];
|
|
310
|
+
|
|
311
|
+
try {
|
|
312
|
+
// Ensure protocol exists
|
|
313
|
+
let finalUrl = targetUrl;
|
|
314
|
+
if (!finalUrl.startsWith('http://') && !finalUrl.startsWith('https://')) {
|
|
315
|
+
finalUrl = 'http://' + finalUrl;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Append sessionId
|
|
319
|
+
const urlObj = new URL(finalUrl);
|
|
320
|
+
urlObj.searchParams.append('sessionId', sessionId);
|
|
321
|
+
finalUrl = urlObj.toString();
|
|
322
|
+
|
|
323
|
+
console.log(`Navigating to ${finalUrl}...`);
|
|
324
|
+
await page.goto(finalUrl, { waitUntil: 'load' });
|
|
325
|
+
await page.waitForTimeout(2000); // Wait for initial render
|
|
326
|
+
|
|
327
|
+
let isRunning = true;
|
|
328
|
+
let loopCount = 0;
|
|
329
|
+
const MAX_LOOPS = 20; // Safety break
|
|
330
|
+
|
|
331
|
+
while (isRunning && loopCount < MAX_LOOPS) {
|
|
332
|
+
loopCount++;
|
|
333
|
+
console.log(`\n--- Cycle ${loopCount} ---`);
|
|
334
|
+
|
|
335
|
+
// 1. Screenshot
|
|
336
|
+
const screenshotBuffer = await page.screenshot({ format: 'png' });
|
|
337
|
+
const screenshotBase64 = screenshotBuffer.toString('base64');
|
|
338
|
+
|
|
339
|
+
// 保存截图到本地
|
|
340
|
+
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
341
|
+
const screenshotPath = path.join(screenshotsDir, `screenshot_${sessionId}_${loopCount}_${timestamp}.png`);
|
|
342
|
+
fs.writeFileSync(screenshotPath, screenshotBuffer);
|
|
343
|
+
console.log(`📸 截图已保存: ${screenshotPath}`);
|
|
344
|
+
|
|
345
|
+
// 获取并记录viewport尺寸
|
|
346
|
+
const viewport = page.viewportSize();
|
|
347
|
+
console.log(`📐 Viewport尺寸: ${viewport.width}x${viewport.height}`);
|
|
348
|
+
console.log(`📦 截图大小: ${screenshotBuffer.length} bytes`);
|
|
349
|
+
|
|
350
|
+
// 2. Prepare Message for AI
|
|
351
|
+
let promptText = `Task: ${taskInput}\n`;
|
|
352
|
+
if (simulatedUserKnownInfo) {
|
|
353
|
+
promptText += `\nKnown User Info: ${JSON.stringify(simulatedUserKnownInfo, null, 2)}\n`;
|
|
354
|
+
}
|
|
355
|
+
promptText += `\nPlease perform the next action based on the screenshot.`;
|
|
356
|
+
|
|
357
|
+
const userContent = [
|
|
358
|
+
{ type: 'text', text: promptText },
|
|
359
|
+
{ type: 'image', image: screenshotBase64 }
|
|
360
|
+
];
|
|
361
|
+
|
|
362
|
+
conversationHistory.push({
|
|
363
|
+
role: 'user',
|
|
364
|
+
content: userContent
|
|
365
|
+
});
|
|
366
|
+
|
|
367
|
+
// 3. Query AI
|
|
368
|
+
console.log("Querying AI...");
|
|
369
|
+
const aiContent = await queryAI(conversationHistory);
|
|
370
|
+
|
|
371
|
+
if (!aiContent) {
|
|
372
|
+
console.error("Invalid AI response. Retrying...");
|
|
373
|
+
await page.waitForTimeout(2000);
|
|
374
|
+
continue;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Add assistant response to history
|
|
378
|
+
conversationHistory.push({
|
|
379
|
+
role: 'assistant',
|
|
380
|
+
content: aiContent
|
|
381
|
+
});
|
|
382
|
+
|
|
383
|
+
console.log("Raw AI Content (Excerpt):", aiContent);
|
|
384
|
+
|
|
385
|
+
// 4. Parse Actions
|
|
386
|
+
const actions = parseActions(aiContent);
|
|
387
|
+
if (actions.length === 0) {
|
|
388
|
+
console.log("No actions parsed from AI response.");
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
console.log("Actions:", actions);
|
|
392
|
+
// 5. Execute Actions
|
|
393
|
+
for (const action of actions) {
|
|
394
|
+
const result = await executeAction(page, action);
|
|
395
|
+
console.log("Action result:", result);
|
|
396
|
+
if (result === 'FINISHED') {
|
|
397
|
+
isRunning = false;
|
|
398
|
+
break;
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// 6. Short wait between cycles
|
|
403
|
+
if (isRunning) {
|
|
404
|
+
await page.waitForTimeout(3000);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
return { status: 'success', message: 'Task completed', messages: conversationHistory };
|
|
408
|
+
|
|
409
|
+
} catch (error) {
|
|
410
|
+
console.error("Runtime Custom Error:", error);
|
|
411
|
+
return { status: 'error', message: error.message };
|
|
412
|
+
} finally {
|
|
413
|
+
console.log("Closing browser...");
|
|
414
|
+
await browser.close();
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Load env vars
|
|
419
|
+
dotenv.config();
|
|
420
|
+
|
|
421
|
+
/**
|
|
422
|
+
* Format date to YYYYMMDD_HHMMSS
|
|
423
|
+
*/
|
|
424
|
+
function getFormattedDate() {
|
|
425
|
+
const now = new Date();
|
|
426
|
+
const pad = (n) => String(n).padStart(2, '0');
|
|
427
|
+
return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Execute the task flow 5 times and save results in optimized structure.
|
|
432
|
+
* @param {string} targetWeb - The URL of the web app to test
|
|
433
|
+
* @param {string} targetServer - The URL of the backend task service
|
|
434
|
+
* @param {string} taskId - The ID of the task to run
|
|
435
|
+
*/
|
|
436
|
+
async function runTaskLoop(targetWeb, targetServer, taskId) {
|
|
437
|
+
const results = [];
|
|
438
|
+
const datetime = getFormattedDate();
|
|
439
|
+
const folderName = `${taskId}_${datetime}`;
|
|
440
|
+
const outputDir = path.join(process.cwd(), folderName);
|
|
441
|
+
|
|
442
|
+
// Create root output directory
|
|
443
|
+
if (!fs.existsSync(outputDir)) {
|
|
444
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
445
|
+
console.log(`Created output directory: ${outputDir}`);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// Ensure URLs have protocols
|
|
449
|
+
if (!targetWeb.startsWith('http')) targetWeb = 'http://' + targetWeb;
|
|
450
|
+
if (!targetServer.startsWith('http')) targetServer = 'http://' + targetServer;
|
|
451
|
+
|
|
452
|
+
console.log(`Starting Task Loop for ${taskId}`);
|
|
453
|
+
|
|
454
|
+
for (let i = 1; i <= 2; i++) {
|
|
455
|
+
console.log(`\n=== Starting Iteration ${i}/5 ===`);
|
|
456
|
+
try {
|
|
457
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i);
|
|
458
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
459
|
+
|
|
460
|
+
// Create session subdirectory
|
|
461
|
+
const sessionDir = path.join(outputDir, sessionId);
|
|
462
|
+
if (!fs.existsSync(sessionDir)) {
|
|
463
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
// Save messages.json
|
|
467
|
+
fs.writeFileSync(
|
|
468
|
+
path.join(sessionDir, 'messages.json'),
|
|
469
|
+
JSON.stringify(agentMessages, null, 2)
|
|
470
|
+
);
|
|
471
|
+
|
|
472
|
+
// Save result.json
|
|
473
|
+
fs.writeFileSync(
|
|
474
|
+
path.join(sessionDir, 'result.json'),
|
|
475
|
+
JSON.stringify(rest, null, 2)
|
|
476
|
+
);
|
|
477
|
+
|
|
478
|
+
results.push({ sessionId, ...rest });
|
|
479
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
480
|
+
|
|
481
|
+
} catch (e) {
|
|
482
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
483
|
+
results.push({ iteration: i, error: e.message });
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
// Save final-results.json (Consolidated)
|
|
488
|
+
const finalResultPath = path.join(outputDir, 'final-results.json');
|
|
489
|
+
fs.writeFileSync(finalResultPath, JSON.stringify(results, null, 2));
|
|
490
|
+
console.log(`Saved consolidated results to ${finalResultPath}`);
|
|
491
|
+
|
|
492
|
+
// Zip the results
|
|
493
|
+
console.log("Packaging results into ZIP...");
|
|
494
|
+
const zipName = `${folderName}.zip`;
|
|
495
|
+
const zipPath = path.join(process.cwd(), zipName);
|
|
496
|
+
|
|
497
|
+
try {
|
|
498
|
+
const zip = new AdmZip();
|
|
499
|
+
zip.addLocalFolder(outputDir, folderName);
|
|
500
|
+
zip.writeZip(zipPath);
|
|
501
|
+
console.log(`\n✅ All done! Zip saved to: ${zipPath}`);
|
|
502
|
+
} catch (zipError) {
|
|
503
|
+
console.error("Failed to create zip:", zipError);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
return zipPath;
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration) {
|
|
510
|
+
const fetchJson = async (url, opts) => {
|
|
511
|
+
const res = await fetch(url, opts);
|
|
512
|
+
if (!res.ok) {
|
|
513
|
+
const text = await res.text();
|
|
514
|
+
throw new Error(`API ${url} failed: ${res.status} ${text}`);
|
|
515
|
+
}
|
|
516
|
+
return await res.json();
|
|
517
|
+
};
|
|
518
|
+
|
|
519
|
+
// 1. Start Task
|
|
520
|
+
console.log("Calling Start API...");
|
|
521
|
+
const startUrl = `${targetServer}/api/tasks/${taskId}/start`;
|
|
522
|
+
const startData = await fetchJson(startUrl, { method: 'POST' });
|
|
523
|
+
|
|
524
|
+
if (startData.code !== 200) {
|
|
525
|
+
throw new Error(`Start API failed: ${JSON.stringify(startData)}`);
|
|
526
|
+
}
|
|
527
|
+
const { session_id: sessionId, TaskJson } = startData.data;
|
|
528
|
+
|
|
529
|
+
// 2. Setup Task
|
|
530
|
+
console.log(`Setup for session ${sessionId}...`);
|
|
531
|
+
const setupUrl = `${targetServer}/api/tasks/setup`;
|
|
532
|
+
await fetchJson(setupUrl, {
|
|
533
|
+
method: 'POST',
|
|
534
|
+
headers: { 'Content-Type': 'application/json' },
|
|
535
|
+
body: JSON.stringify({ task_id: taskId, session_id: sessionId })
|
|
536
|
+
});
|
|
537
|
+
|
|
538
|
+
// 3. Run Agent
|
|
539
|
+
console.log("Running Agent...");
|
|
540
|
+
const agentResult = await runAgent({
|
|
541
|
+
taskInput: TaskJson.task.instruction,
|
|
542
|
+
targetUrl: targetWeb,
|
|
543
|
+
sessionId: sessionId,
|
|
544
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
545
|
+
});
|
|
546
|
+
|
|
547
|
+
// 4. Verify Task
|
|
548
|
+
console.log("Verifying result...");
|
|
549
|
+
const verifyUrl = `${targetServer}/api/tasks/verify`;
|
|
550
|
+
const verifyData = await fetchJson(verifyUrl, {
|
|
551
|
+
method: 'POST',
|
|
552
|
+
headers: { 'Content-Type': 'application/json' },
|
|
553
|
+
body: JSON.stringify({ task_id: taskId, session_id: sessionId })
|
|
554
|
+
});
|
|
555
|
+
|
|
556
|
+
return {
|
|
557
|
+
iteration,
|
|
558
|
+
sessionId,
|
|
559
|
+
agentStatus: agentResult.status,
|
|
560
|
+
agentMessages: agentResult.messages || [],
|
|
561
|
+
verifyResult: verifyData.data
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
export { runAgent, runTaskLoop };
|