rl-simulator-core 1.0.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +102 -41
- package/dist/index.d.ts +102 -41
- package/dist/index.js +82 -37
- package/dist/index.mjs +82 -37
- package/package.json +1 -1
- package/src/index.js +57 -35
- package/src/runner.js +48 -9
package/dist/index.d.mts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import { chromium } from 'playwright';
|
|
1
2
|
import fs from 'fs';
|
|
2
3
|
import path from 'path';
|
|
3
|
-
import { chromium } from 'playwright';
|
|
4
4
|
import { createOpenAI } from '@ai-sdk/openai';
|
|
5
5
|
import { generateText } from 'ai';
|
|
6
6
|
import dotenv from 'dotenv';
|
|
@@ -286,7 +286,7 @@ async function executeAction(page, action) {
|
|
|
286
286
|
return 'CONTINUE';
|
|
287
287
|
}
|
|
288
288
|
|
|
289
|
-
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
289
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
|
|
290
290
|
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
291
291
|
|
|
292
292
|
const screenshotsDir = path.join(process.cwd(), 'screenshots');
|
|
@@ -295,11 +295,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
295
295
|
console.log(`📁 创建截图目录: ${screenshotsDir}`);
|
|
296
296
|
}
|
|
297
297
|
|
|
298
|
-
// 2. Launch Browser
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
298
|
+
// 2. Launch Browser (or use existing)
|
|
299
|
+
let browser;
|
|
300
|
+
let shouldCloseBrowser = false;
|
|
301
|
+
|
|
302
|
+
if (externalBrowser) {
|
|
303
|
+
browser = externalBrowser;
|
|
304
|
+
} else {
|
|
305
|
+
browser = await chromium.launch({
|
|
306
|
+
headless: true, // Visible for demo/debug
|
|
307
|
+
args: ['--start-maximized'] // Attempt to maximize
|
|
308
|
+
});
|
|
309
|
+
shouldCloseBrowser = true;
|
|
310
|
+
}
|
|
303
311
|
|
|
304
312
|
const context = await browser.newContext({
|
|
305
313
|
viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
|
|
@@ -366,7 +374,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
366
374
|
|
|
367
375
|
// 3. Query AI
|
|
368
376
|
console.log("Querying AI...");
|
|
369
|
-
|
|
377
|
+
|
|
378
|
+
// Optimization: Window mode & Filter images
|
|
379
|
+
// 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
|
|
380
|
+
let optimizedHistory = conversationHistory;
|
|
381
|
+
if (conversationHistory.length > 40) {
|
|
382
|
+
optimizedHistory = conversationHistory.slice(-40);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
|
|
386
|
+
// Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
|
|
387
|
+
optimizedHistory = optimizedHistory.map((msg, index) => {
|
|
388
|
+
// Determine if this message is within the "recent 3 rounds" window
|
|
389
|
+
// length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
|
|
390
|
+
const isRecent = index >= optimizedHistory.length - 6;
|
|
391
|
+
|
|
392
|
+
if (isRecent) return msg; // Keep recent messages intact (including images)
|
|
393
|
+
|
|
394
|
+
if (Array.isArray(msg.content)) {
|
|
395
|
+
return {
|
|
396
|
+
...msg,
|
|
397
|
+
content: msg.content.filter(c => c.type !== 'image')
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
return msg;
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
const aiContent = await queryAI(optimizedHistory);
|
|
370
404
|
|
|
371
405
|
if (!aiContent) {
|
|
372
406
|
console.error("Invalid AI response. Retrying...");
|
|
@@ -410,8 +444,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
410
444
|
console.error("Runtime Custom Error:", error);
|
|
411
445
|
return { status: 'error', message: error.message };
|
|
412
446
|
} finally {
|
|
413
|
-
|
|
414
|
-
|
|
447
|
+
if (shouldCloseBrowser) {
|
|
448
|
+
console.log("Closing browser...");
|
|
449
|
+
await browser.close();
|
|
450
|
+
} else {
|
|
451
|
+
console.log("Closing context...");
|
|
452
|
+
await context.close();
|
|
453
|
+
}
|
|
415
454
|
}
|
|
416
455
|
}
|
|
417
456
|
|
|
@@ -434,7 +473,6 @@ function getFormattedDate() {
|
|
|
434
473
|
* @param {string} taskId - The ID of the task to run
|
|
435
474
|
*/
|
|
436
475
|
async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
|
|
437
|
-
const results = [];
|
|
438
476
|
const datetime = getFormattedDate();
|
|
439
477
|
const folderName = `${taskId}_${datetime}`;
|
|
440
478
|
const outputDir = path.join(process.cwd(), folderName);
|
|
@@ -451,37 +489,59 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
451
489
|
|
|
452
490
|
console.log(`Starting Task Loop for ${taskId}`);
|
|
453
491
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
// Create session subdirectory
|
|
461
|
-
const sessionDir = path.join(outputDir, sessionId);
|
|
462
|
-
if (!fs.existsSync(sessionDir)) {
|
|
463
|
-
fs.mkdirSync(sessionDir, { recursive: true });
|
|
464
|
-
}
|
|
492
|
+
// Launch a shared browser instance
|
|
493
|
+
const browser = await chromium.launch({
|
|
494
|
+
headless: true,
|
|
495
|
+
args: ['--start-maximized']
|
|
496
|
+
});
|
|
465
497
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
path.join(sessionDir, 'messages.json'),
|
|
469
|
-
JSON.stringify(agentMessages, null, 2)
|
|
470
|
-
);
|
|
498
|
+
const results = [];
|
|
499
|
+
const MAX_CONCURRENCY = 5;
|
|
471
500
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
501
|
+
try {
|
|
502
|
+
const promises = [];
|
|
503
|
+
for (let i = 1; i <= MAX_CONCURRENCY; i++) {
|
|
504
|
+
promises.push((async () => {
|
|
505
|
+
console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
|
|
506
|
+
try {
|
|
507
|
+
// Pass shared browser to executeSingleCycle
|
|
508
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
|
|
509
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
510
|
+
|
|
511
|
+
// Create session subdirectory
|
|
512
|
+
const sessionDir = path.join(outputDir, sessionId);
|
|
513
|
+
if (!fs.existsSync(sessionDir)) {
|
|
514
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Save messages.json
|
|
518
|
+
fs.writeFileSync(
|
|
519
|
+
path.join(sessionDir, 'messages.json'),
|
|
520
|
+
JSON.stringify(agentMessages, null, 2)
|
|
521
|
+
);
|
|
522
|
+
|
|
523
|
+
// Save result.json
|
|
524
|
+
fs.writeFileSync(
|
|
525
|
+
path.join(sessionDir, 'result.json'),
|
|
526
|
+
JSON.stringify(rest, null, 2)
|
|
527
|
+
);
|
|
528
|
+
|
|
529
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
530
|
+
return { sessionId, ...rest };
|
|
531
|
+
} catch (e) {
|
|
532
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
533
|
+
return { iteration: i, error: e.message };
|
|
534
|
+
}
|
|
535
|
+
})());
|
|
536
|
+
}
|
|
477
537
|
|
|
478
|
-
|
|
479
|
-
|
|
538
|
+
// Wait for all to finish
|
|
539
|
+
const resultsArray = await Promise.all(promises);
|
|
540
|
+
results.push(...resultsArray);
|
|
480
541
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
}
|
|
542
|
+
} finally {
|
|
543
|
+
console.log("Closing shared browser...");
|
|
544
|
+
await browser.close();
|
|
485
545
|
}
|
|
486
546
|
|
|
487
547
|
// Save final-results.json (Consolidated)
|
|
@@ -489,7 +549,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
489
549
|
results.filter(r => r.verifyResult);
|
|
490
550
|
const scores = results.map(r => r.verifyResult?.score || 0);
|
|
491
551
|
const successCount = scores.filter(s => s === 1).length;
|
|
492
|
-
const total =
|
|
552
|
+
const total = MAX_CONCURRENCY;
|
|
493
553
|
|
|
494
554
|
const rate = `${successCount}/${total}`;
|
|
495
555
|
const summary = {
|
|
@@ -527,7 +587,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
527
587
|
};
|
|
528
588
|
}
|
|
529
589
|
|
|
530
|
-
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
|
|
590
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
|
|
531
591
|
const fetchJson = async (url, opts) => {
|
|
532
592
|
const res = await fetch(url, opts);
|
|
533
593
|
if (!res.ok) {
|
|
@@ -562,7 +622,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
|
|
|
562
622
|
taskInput: TaskJson.task.instruction,
|
|
563
623
|
targetUrl: targetWeb,
|
|
564
624
|
sessionId: sessionId,
|
|
565
|
-
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
625
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
|
|
626
|
+
browser: browser // Pass the shared browser instance
|
|
566
627
|
});
|
|
567
628
|
|
|
568
629
|
// 4. Verify Task
|
package/dist/index.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import { chromium } from 'playwright';
|
|
1
2
|
import fs from 'fs';
|
|
2
3
|
import path from 'path';
|
|
3
|
-
import { chromium } from 'playwright';
|
|
4
4
|
import { createOpenAI } from '@ai-sdk/openai';
|
|
5
5
|
import { generateText } from 'ai';
|
|
6
6
|
import dotenv from 'dotenv';
|
|
@@ -286,7 +286,7 @@ async function executeAction(page, action) {
|
|
|
286
286
|
return 'CONTINUE';
|
|
287
287
|
}
|
|
288
288
|
|
|
289
|
-
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
289
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
|
|
290
290
|
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
291
291
|
|
|
292
292
|
const screenshotsDir = path.join(process.cwd(), 'screenshots');
|
|
@@ -295,11 +295,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
295
295
|
console.log(`📁 创建截图目录: ${screenshotsDir}`);
|
|
296
296
|
}
|
|
297
297
|
|
|
298
|
-
// 2. Launch Browser
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
298
|
+
// 2. Launch Browser (or use existing)
|
|
299
|
+
let browser;
|
|
300
|
+
let shouldCloseBrowser = false;
|
|
301
|
+
|
|
302
|
+
if (externalBrowser) {
|
|
303
|
+
browser = externalBrowser;
|
|
304
|
+
} else {
|
|
305
|
+
browser = await chromium.launch({
|
|
306
|
+
headless: true, // Visible for demo/debug
|
|
307
|
+
args: ['--start-maximized'] // Attempt to maximize
|
|
308
|
+
});
|
|
309
|
+
shouldCloseBrowser = true;
|
|
310
|
+
}
|
|
303
311
|
|
|
304
312
|
const context = await browser.newContext({
|
|
305
313
|
viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
|
|
@@ -366,7 +374,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
366
374
|
|
|
367
375
|
// 3. Query AI
|
|
368
376
|
console.log("Querying AI...");
|
|
369
|
-
|
|
377
|
+
|
|
378
|
+
// Optimization: Window mode & Filter images
|
|
379
|
+
// 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
|
|
380
|
+
let optimizedHistory = conversationHistory;
|
|
381
|
+
if (conversationHistory.length > 40) {
|
|
382
|
+
optimizedHistory = conversationHistory.slice(-40);
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
|
|
386
|
+
// Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
|
|
387
|
+
optimizedHistory = optimizedHistory.map((msg, index) => {
|
|
388
|
+
// Determine if this message is within the "recent 3 rounds" window
|
|
389
|
+
// length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
|
|
390
|
+
const isRecent = index >= optimizedHistory.length - 6;
|
|
391
|
+
|
|
392
|
+
if (isRecent) return msg; // Keep recent messages intact (including images)
|
|
393
|
+
|
|
394
|
+
if (Array.isArray(msg.content)) {
|
|
395
|
+
return {
|
|
396
|
+
...msg,
|
|
397
|
+
content: msg.content.filter(c => c.type !== 'image')
|
|
398
|
+
};
|
|
399
|
+
}
|
|
400
|
+
return msg;
|
|
401
|
+
});
|
|
402
|
+
|
|
403
|
+
const aiContent = await queryAI(optimizedHistory);
|
|
370
404
|
|
|
371
405
|
if (!aiContent) {
|
|
372
406
|
console.error("Invalid AI response. Retrying...");
|
|
@@ -410,8 +444,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
410
444
|
console.error("Runtime Custom Error:", error);
|
|
411
445
|
return { status: 'error', message: error.message };
|
|
412
446
|
} finally {
|
|
413
|
-
|
|
414
|
-
|
|
447
|
+
if (shouldCloseBrowser) {
|
|
448
|
+
console.log("Closing browser...");
|
|
449
|
+
await browser.close();
|
|
450
|
+
} else {
|
|
451
|
+
console.log("Closing context...");
|
|
452
|
+
await context.close();
|
|
453
|
+
}
|
|
415
454
|
}
|
|
416
455
|
}
|
|
417
456
|
|
|
@@ -434,7 +473,6 @@ function getFormattedDate() {
|
|
|
434
473
|
* @param {string} taskId - The ID of the task to run
|
|
435
474
|
*/
|
|
436
475
|
async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
|
|
437
|
-
const results = [];
|
|
438
476
|
const datetime = getFormattedDate();
|
|
439
477
|
const folderName = `${taskId}_${datetime}`;
|
|
440
478
|
const outputDir = path.join(process.cwd(), folderName);
|
|
@@ -451,37 +489,59 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
451
489
|
|
|
452
490
|
console.log(`Starting Task Loop for ${taskId}`);
|
|
453
491
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
// Create session subdirectory
|
|
461
|
-
const sessionDir = path.join(outputDir, sessionId);
|
|
462
|
-
if (!fs.existsSync(sessionDir)) {
|
|
463
|
-
fs.mkdirSync(sessionDir, { recursive: true });
|
|
464
|
-
}
|
|
492
|
+
// Launch a shared browser instance
|
|
493
|
+
const browser = await chromium.launch({
|
|
494
|
+
headless: true,
|
|
495
|
+
args: ['--start-maximized']
|
|
496
|
+
});
|
|
465
497
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
path.join(sessionDir, 'messages.json'),
|
|
469
|
-
JSON.stringify(agentMessages, null, 2)
|
|
470
|
-
);
|
|
498
|
+
const results = [];
|
|
499
|
+
const MAX_CONCURRENCY = 5;
|
|
471
500
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
501
|
+
try {
|
|
502
|
+
const promises = [];
|
|
503
|
+
for (let i = 1; i <= MAX_CONCURRENCY; i++) {
|
|
504
|
+
promises.push((async () => {
|
|
505
|
+
console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
|
|
506
|
+
try {
|
|
507
|
+
// Pass shared browser to executeSingleCycle
|
|
508
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
|
|
509
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
510
|
+
|
|
511
|
+
// Create session subdirectory
|
|
512
|
+
const sessionDir = path.join(outputDir, sessionId);
|
|
513
|
+
if (!fs.existsSync(sessionDir)) {
|
|
514
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Save messages.json
|
|
518
|
+
fs.writeFileSync(
|
|
519
|
+
path.join(sessionDir, 'messages.json'),
|
|
520
|
+
JSON.stringify(agentMessages, null, 2)
|
|
521
|
+
);
|
|
522
|
+
|
|
523
|
+
// Save result.json
|
|
524
|
+
fs.writeFileSync(
|
|
525
|
+
path.join(sessionDir, 'result.json'),
|
|
526
|
+
JSON.stringify(rest, null, 2)
|
|
527
|
+
);
|
|
528
|
+
|
|
529
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
530
|
+
return { sessionId, ...rest };
|
|
531
|
+
} catch (e) {
|
|
532
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
533
|
+
return { iteration: i, error: e.message };
|
|
534
|
+
}
|
|
535
|
+
})());
|
|
536
|
+
}
|
|
477
537
|
|
|
478
|
-
|
|
479
|
-
|
|
538
|
+
// Wait for all to finish
|
|
539
|
+
const resultsArray = await Promise.all(promises);
|
|
540
|
+
results.push(...resultsArray);
|
|
480
541
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
}
|
|
542
|
+
} finally {
|
|
543
|
+
console.log("Closing shared browser...");
|
|
544
|
+
await browser.close();
|
|
485
545
|
}
|
|
486
546
|
|
|
487
547
|
// Save final-results.json (Consolidated)
|
|
@@ -489,7 +549,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
489
549
|
results.filter(r => r.verifyResult);
|
|
490
550
|
const scores = results.map(r => r.verifyResult?.score || 0);
|
|
491
551
|
const successCount = scores.filter(s => s === 1).length;
|
|
492
|
-
const total =
|
|
552
|
+
const total = MAX_CONCURRENCY;
|
|
493
553
|
|
|
494
554
|
const rate = `${successCount}/${total}`;
|
|
495
555
|
const summary = {
|
|
@@ -527,7 +587,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
527
587
|
};
|
|
528
588
|
}
|
|
529
589
|
|
|
530
|
-
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
|
|
590
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
|
|
531
591
|
const fetchJson = async (url, opts) => {
|
|
532
592
|
const res = await fetch(url, opts);
|
|
533
593
|
if (!res.ok) {
|
|
@@ -562,7 +622,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
|
|
|
562
622
|
taskInput: TaskJson.task.instruction,
|
|
563
623
|
targetUrl: targetWeb,
|
|
564
624
|
sessionId: sessionId,
|
|
565
|
-
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
625
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
|
|
626
|
+
browser: browser // Pass the shared browser instance
|
|
566
627
|
});
|
|
567
628
|
|
|
568
629
|
// 4. Verify Task
|
package/dist/index.js
CHANGED
|
@@ -33,6 +33,7 @@ __export(index_exports, {
|
|
|
33
33
|
runTaskLoop: () => runTaskLoop
|
|
34
34
|
});
|
|
35
35
|
module.exports = __toCommonJS(index_exports);
|
|
36
|
+
var import_playwright2 = require("playwright");
|
|
36
37
|
var import_fs2 = __toESM(require("fs"));
|
|
37
38
|
var import_path3 = __toESM(require("path"));
|
|
38
39
|
|
|
@@ -294,19 +295,26 @@ async function executeAction(page, action) {
|
|
|
294
295
|
}
|
|
295
296
|
|
|
296
297
|
// src/runner.js
|
|
297
|
-
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
298
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
|
|
298
299
|
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
299
300
|
const screenshotsDir = import_path2.default.join(process.cwd(), "screenshots");
|
|
300
301
|
if (!import_fs.default.existsSync(screenshotsDir)) {
|
|
301
302
|
import_fs.default.mkdirSync(screenshotsDir, { recursive: true });
|
|
302
303
|
console.log(`\u{1F4C1} \u521B\u5EFA\u622A\u56FE\u76EE\u5F55: ${screenshotsDir}`);
|
|
303
304
|
}
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
305
|
+
let browser;
|
|
306
|
+
let shouldCloseBrowser = false;
|
|
307
|
+
if (externalBrowser) {
|
|
308
|
+
browser = externalBrowser;
|
|
309
|
+
} else {
|
|
310
|
+
browser = await import_playwright.chromium.launch({
|
|
311
|
+
headless: true,
|
|
312
|
+
// Visible for demo/debug
|
|
313
|
+
args: ["--start-maximized"]
|
|
314
|
+
// Attempt to maximize
|
|
315
|
+
});
|
|
316
|
+
shouldCloseBrowser = true;
|
|
317
|
+
}
|
|
310
318
|
const context = await browser.newContext({
|
|
311
319
|
viewport: { width: 430, height: 800 }
|
|
312
320
|
// Set a reasonable fixed viewport
|
|
@@ -358,7 +366,22 @@ Please perform the next action based on the screenshot.`;
|
|
|
358
366
|
content: userContent
|
|
359
367
|
});
|
|
360
368
|
console.log("Querying AI...");
|
|
361
|
-
|
|
369
|
+
let optimizedHistory = conversationHistory;
|
|
370
|
+
if (conversationHistory.length > 40) {
|
|
371
|
+
optimizedHistory = conversationHistory.slice(-40);
|
|
372
|
+
}
|
|
373
|
+
optimizedHistory = optimizedHistory.map((msg, index) => {
|
|
374
|
+
const isRecent = index >= optimizedHistory.length - 6;
|
|
375
|
+
if (isRecent) return msg;
|
|
376
|
+
if (Array.isArray(msg.content)) {
|
|
377
|
+
return {
|
|
378
|
+
...msg,
|
|
379
|
+
content: msg.content.filter((c) => c.type !== "image")
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
return msg;
|
|
383
|
+
});
|
|
384
|
+
const aiContent = await queryAI(optimizedHistory);
|
|
362
385
|
if (!aiContent) {
|
|
363
386
|
console.error("Invalid AI response. Retrying...");
|
|
364
387
|
await page.waitForTimeout(2e3);
|
|
@@ -391,8 +414,13 @@ Please perform the next action based on the screenshot.`;
|
|
|
391
414
|
console.error("Runtime Custom Error:", error);
|
|
392
415
|
return { status: "error", message: error.message };
|
|
393
416
|
} finally {
|
|
394
|
-
|
|
395
|
-
|
|
417
|
+
if (shouldCloseBrowser) {
|
|
418
|
+
console.log("Closing browser...");
|
|
419
|
+
await browser.close();
|
|
420
|
+
} else {
|
|
421
|
+
console.log("Closing context...");
|
|
422
|
+
await context.close();
|
|
423
|
+
}
|
|
396
424
|
}
|
|
397
425
|
}
|
|
398
426
|
|
|
@@ -406,7 +434,6 @@ function getFormattedDate() {
|
|
|
406
434
|
return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
407
435
|
}
|
|
408
436
|
async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
|
|
409
|
-
const results = [];
|
|
410
437
|
const datetime = getFormattedDate();
|
|
411
438
|
const folderName = `${taskId}_${datetime}`;
|
|
412
439
|
const outputDir = import_path3.default.join(process.cwd(), folderName);
|
|
@@ -417,30 +444,46 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
417
444
|
if (!targetWeb.startsWith("http")) targetWeb = "http://" + targetWeb;
|
|
418
445
|
if (!targetServer.startsWith("http")) targetServer = "http://" + targetServer;
|
|
419
446
|
console.log(`Starting Task Loop for ${taskId}`);
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
447
|
+
const browser = await import_playwright2.chromium.launch({
|
|
448
|
+
headless: true,
|
|
449
|
+
args: ["--start-maximized"]
|
|
450
|
+
});
|
|
451
|
+
const results = [];
|
|
452
|
+
const MAX_CONCURRENCY = 5;
|
|
453
|
+
try {
|
|
454
|
+
const promises = [];
|
|
455
|
+
for (let i = 1; i <= MAX_CONCURRENCY; i++) {
|
|
456
|
+
promises.push((async () => {
|
|
457
|
+
console.log(`
|
|
458
|
+
=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
|
|
459
|
+
try {
|
|
460
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
|
|
461
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
462
|
+
const sessionDir = import_path3.default.join(outputDir, sessionId);
|
|
463
|
+
if (!import_fs2.default.existsSync(sessionDir)) {
|
|
464
|
+
import_fs2.default.mkdirSync(sessionDir, { recursive: true });
|
|
465
|
+
}
|
|
466
|
+
import_fs2.default.writeFileSync(
|
|
467
|
+
import_path3.default.join(sessionDir, "messages.json"),
|
|
468
|
+
JSON.stringify(agentMessages, null, 2)
|
|
469
|
+
);
|
|
470
|
+
import_fs2.default.writeFileSync(
|
|
471
|
+
import_path3.default.join(sessionDir, "result.json"),
|
|
472
|
+
JSON.stringify(rest, null, 2)
|
|
473
|
+
);
|
|
474
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
475
|
+
return { sessionId, ...rest };
|
|
476
|
+
} catch (e) {
|
|
477
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
478
|
+
return { iteration: i, error: e.message };
|
|
479
|
+
}
|
|
480
|
+
})());
|
|
443
481
|
}
|
|
482
|
+
const resultsArray = await Promise.all(promises);
|
|
483
|
+
results.push(...resultsArray);
|
|
484
|
+
} finally {
|
|
485
|
+
console.log("Closing shared browser...");
|
|
486
|
+
await browser.close();
|
|
444
487
|
}
|
|
445
488
|
const validResults = results.filter((r) => r.verifyResult);
|
|
446
489
|
const scores = results.map((r) => {
|
|
@@ -448,7 +491,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
448
491
|
return ((_a = r.verifyResult) == null ? void 0 : _a.score) || 0;
|
|
449
492
|
});
|
|
450
493
|
const successCount = scores.filter((s) => s === 1).length;
|
|
451
|
-
const total =
|
|
494
|
+
const total = MAX_CONCURRENCY;
|
|
452
495
|
const rate = `${successCount}/${total}`;
|
|
453
496
|
const summary = {
|
|
454
497
|
rate,
|
|
@@ -479,7 +522,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
479
522
|
taskId
|
|
480
523
|
};
|
|
481
524
|
}
|
|
482
|
-
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
|
|
525
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
|
|
483
526
|
const fetchJson = async (url, opts) => {
|
|
484
527
|
const res = await fetch(url, opts);
|
|
485
528
|
if (!res.ok) {
|
|
@@ -507,7 +550,9 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
|
|
|
507
550
|
taskInput: TaskJson.task.instruction,
|
|
508
551
|
targetUrl: targetWeb,
|
|
509
552
|
sessionId,
|
|
510
|
-
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
553
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
|
|
554
|
+
browser
|
|
555
|
+
// Pass the shared browser instance
|
|
511
556
|
});
|
|
512
557
|
console.log("Verifying result...");
|
|
513
558
|
const verifyUrl = `${targetServer}/api/verify/run`;
|
package/dist/index.mjs
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
// src/index.js
|
|
2
|
+
import { chromium as chromium2 } from "playwright";
|
|
2
3
|
import fs2 from "fs";
|
|
3
4
|
import path3 from "path";
|
|
4
5
|
|
|
@@ -260,19 +261,26 @@ async function executeAction(page, action) {
|
|
|
260
261
|
}
|
|
261
262
|
|
|
262
263
|
// src/runner.js
|
|
263
|
-
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
264
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
|
|
264
265
|
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
265
266
|
const screenshotsDir = path2.join(process.cwd(), "screenshots");
|
|
266
267
|
if (!fs.existsSync(screenshotsDir)) {
|
|
267
268
|
fs.mkdirSync(screenshotsDir, { recursive: true });
|
|
268
269
|
console.log(`\u{1F4C1} \u521B\u5EFA\u622A\u56FE\u76EE\u5F55: ${screenshotsDir}`);
|
|
269
270
|
}
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
271
|
+
let browser;
|
|
272
|
+
let shouldCloseBrowser = false;
|
|
273
|
+
if (externalBrowser) {
|
|
274
|
+
browser = externalBrowser;
|
|
275
|
+
} else {
|
|
276
|
+
browser = await chromium.launch({
|
|
277
|
+
headless: true,
|
|
278
|
+
// Visible for demo/debug
|
|
279
|
+
args: ["--start-maximized"]
|
|
280
|
+
// Attempt to maximize
|
|
281
|
+
});
|
|
282
|
+
shouldCloseBrowser = true;
|
|
283
|
+
}
|
|
276
284
|
const context = await browser.newContext({
|
|
277
285
|
viewport: { width: 430, height: 800 }
|
|
278
286
|
// Set a reasonable fixed viewport
|
|
@@ -324,7 +332,22 @@ Please perform the next action based on the screenshot.`;
|
|
|
324
332
|
content: userContent
|
|
325
333
|
});
|
|
326
334
|
console.log("Querying AI...");
|
|
327
|
-
|
|
335
|
+
let optimizedHistory = conversationHistory;
|
|
336
|
+
if (conversationHistory.length > 40) {
|
|
337
|
+
optimizedHistory = conversationHistory.slice(-40);
|
|
338
|
+
}
|
|
339
|
+
optimizedHistory = optimizedHistory.map((msg, index) => {
|
|
340
|
+
const isRecent = index >= optimizedHistory.length - 6;
|
|
341
|
+
if (isRecent) return msg;
|
|
342
|
+
if (Array.isArray(msg.content)) {
|
|
343
|
+
return {
|
|
344
|
+
...msg,
|
|
345
|
+
content: msg.content.filter((c) => c.type !== "image")
|
|
346
|
+
};
|
|
347
|
+
}
|
|
348
|
+
return msg;
|
|
349
|
+
});
|
|
350
|
+
const aiContent = await queryAI(optimizedHistory);
|
|
328
351
|
if (!aiContent) {
|
|
329
352
|
console.error("Invalid AI response. Retrying...");
|
|
330
353
|
await page.waitForTimeout(2e3);
|
|
@@ -357,8 +380,13 @@ Please perform the next action based on the screenshot.`;
|
|
|
357
380
|
console.error("Runtime Custom Error:", error);
|
|
358
381
|
return { status: "error", message: error.message };
|
|
359
382
|
} finally {
|
|
360
|
-
|
|
361
|
-
|
|
383
|
+
if (shouldCloseBrowser) {
|
|
384
|
+
console.log("Closing browser...");
|
|
385
|
+
await browser.close();
|
|
386
|
+
} else {
|
|
387
|
+
console.log("Closing context...");
|
|
388
|
+
await context.close();
|
|
389
|
+
}
|
|
362
390
|
}
|
|
363
391
|
}
|
|
364
392
|
|
|
@@ -372,7 +400,6 @@ function getFormattedDate() {
|
|
|
372
400
|
return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
|
|
373
401
|
}
|
|
374
402
|
async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
|
|
375
|
-
const results = [];
|
|
376
403
|
const datetime = getFormattedDate();
|
|
377
404
|
const folderName = `${taskId}_${datetime}`;
|
|
378
405
|
const outputDir = path3.join(process.cwd(), folderName);
|
|
@@ -383,30 +410,46 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
383
410
|
if (!targetWeb.startsWith("http")) targetWeb = "http://" + targetWeb;
|
|
384
411
|
if (!targetServer.startsWith("http")) targetServer = "http://" + targetServer;
|
|
385
412
|
console.log(`Starting Task Loop for ${taskId}`);
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
413
|
+
const browser = await chromium2.launch({
|
|
414
|
+
headless: true,
|
|
415
|
+
args: ["--start-maximized"]
|
|
416
|
+
});
|
|
417
|
+
const results = [];
|
|
418
|
+
const MAX_CONCURRENCY = 5;
|
|
419
|
+
try {
|
|
420
|
+
const promises = [];
|
|
421
|
+
for (let i = 1; i <= MAX_CONCURRENCY; i++) {
|
|
422
|
+
promises.push((async () => {
|
|
423
|
+
console.log(`
|
|
424
|
+
=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
|
|
425
|
+
try {
|
|
426
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
|
|
427
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
428
|
+
const sessionDir = path3.join(outputDir, sessionId);
|
|
429
|
+
if (!fs2.existsSync(sessionDir)) {
|
|
430
|
+
fs2.mkdirSync(sessionDir, { recursive: true });
|
|
431
|
+
}
|
|
432
|
+
fs2.writeFileSync(
|
|
433
|
+
path3.join(sessionDir, "messages.json"),
|
|
434
|
+
JSON.stringify(agentMessages, null, 2)
|
|
435
|
+
);
|
|
436
|
+
fs2.writeFileSync(
|
|
437
|
+
path3.join(sessionDir, "result.json"),
|
|
438
|
+
JSON.stringify(rest, null, 2)
|
|
439
|
+
);
|
|
440
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
441
|
+
return { sessionId, ...rest };
|
|
442
|
+
} catch (e) {
|
|
443
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
444
|
+
return { iteration: i, error: e.message };
|
|
445
|
+
}
|
|
446
|
+
})());
|
|
409
447
|
}
|
|
448
|
+
const resultsArray = await Promise.all(promises);
|
|
449
|
+
results.push(...resultsArray);
|
|
450
|
+
} finally {
|
|
451
|
+
console.log("Closing shared browser...");
|
|
452
|
+
await browser.close();
|
|
410
453
|
}
|
|
411
454
|
const validResults = results.filter((r) => r.verifyResult);
|
|
412
455
|
const scores = results.map((r) => {
|
|
@@ -414,7 +457,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
414
457
|
return ((_a = r.verifyResult) == null ? void 0 : _a.score) || 0;
|
|
415
458
|
});
|
|
416
459
|
const successCount = scores.filter((s) => s === 1).length;
|
|
417
|
-
const total =
|
|
460
|
+
const total = MAX_CONCURRENCY;
|
|
418
461
|
const rate = `${successCount}/${total}`;
|
|
419
462
|
const summary = {
|
|
420
463
|
rate,
|
|
@@ -445,7 +488,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
|
|
|
445
488
|
taskId
|
|
446
489
|
};
|
|
447
490
|
}
|
|
448
|
-
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
|
|
491
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
|
|
449
492
|
const fetchJson = async (url, opts) => {
|
|
450
493
|
const res = await fetch(url, opts);
|
|
451
494
|
if (!res.ok) {
|
|
@@ -473,7 +516,9 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
|
|
|
473
516
|
taskInput: TaskJson.task.instruction,
|
|
474
517
|
targetUrl: targetWeb,
|
|
475
518
|
sessionId,
|
|
476
|
-
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
519
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
|
|
520
|
+
browser
|
|
521
|
+
// Pass the shared browser instance
|
|
477
522
|
});
|
|
478
523
|
console.log("Verifying result...");
|
|
479
524
|
const verifyUrl = `${targetServer}/api/verify/run`;
|
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
import { chromium } from 'playwright';
|
|
2
2
|
import fs from 'fs';
|
|
3
3
|
import path from 'path';
|
|
4
4
|
import { runAgent } from './runner.js';
|
|
@@ -26,7 +26,6 @@ function getFormattedDate() {
|
|
|
26
26
|
* @param {string} taskId - The ID of the task to run
|
|
27
27
|
*/
|
|
28
28
|
export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
|
|
29
|
-
const results = [];
|
|
30
29
|
const datetime = getFormattedDate();
|
|
31
30
|
const folderName = `${taskId}_${datetime}`;
|
|
32
31
|
const outputDir = path.join(process.cwd(), folderName);
|
|
@@ -43,37 +42,59 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
|
|
|
43
42
|
|
|
44
43
|
console.log(`Starting Task Loop for ${taskId}`);
|
|
45
44
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
45
|
+
// Launch a shared browser instance
|
|
46
|
+
const browser = await chromium.launch({
|
|
47
|
+
headless: true,
|
|
48
|
+
args: ['--start-maximized']
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const results = [];
|
|
52
|
+
const MAX_CONCURRENCY = 5;
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const promises = [];
|
|
56
|
+
for (let i = 1; i <= MAX_CONCURRENCY; i++) {
|
|
57
|
+
promises.push((async () => {
|
|
58
|
+
console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
|
|
59
|
+
try {
|
|
60
|
+
// Pass shared browser to executeSingleCycle
|
|
61
|
+
const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
|
|
62
|
+
const { sessionId, agentMessages, ...rest } = data;
|
|
63
|
+
|
|
64
|
+
// Create session subdirectory
|
|
65
|
+
const sessionDir = path.join(outputDir, sessionId);
|
|
66
|
+
if (!fs.existsSync(sessionDir)) {
|
|
67
|
+
fs.mkdirSync(sessionDir, { recursive: true });
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Save messages.json
|
|
71
|
+
fs.writeFileSync(
|
|
72
|
+
path.join(sessionDir, 'messages.json'),
|
|
73
|
+
JSON.stringify(agentMessages, null, 2)
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
// Save result.json
|
|
77
|
+
fs.writeFileSync(
|
|
78
|
+
path.join(sessionDir, 'result.json'),
|
|
79
|
+
JSON.stringify(rest, null, 2)
|
|
80
|
+
);
|
|
81
|
+
|
|
82
|
+
console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
|
|
83
|
+
return { sessionId, ...rest };
|
|
84
|
+
} catch (e) {
|
|
85
|
+
console.error(`Iteration ${i} failed:`, e.stack || e.message);
|
|
86
|
+
return { iteration: i, error: e.message };
|
|
87
|
+
}
|
|
88
|
+
})());
|
|
76
89
|
}
|
|
90
|
+
|
|
91
|
+
// Wait for all to finish
|
|
92
|
+
const resultsArray = await Promise.all(promises);
|
|
93
|
+
results.push(...resultsArray);
|
|
94
|
+
|
|
95
|
+
} finally {
|
|
96
|
+
console.log("Closing shared browser...");
|
|
97
|
+
await browser.close();
|
|
77
98
|
}
|
|
78
99
|
|
|
79
100
|
// Save final-results.json (Consolidated)
|
|
@@ -81,7 +102,7 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
|
|
|
81
102
|
const validResults = results.filter(r => r.verifyResult);
|
|
82
103
|
const scores = results.map(r => r.verifyResult?.score || 0);
|
|
83
104
|
const successCount = scores.filter(s => s === 1).length;
|
|
84
|
-
const total =
|
|
105
|
+
const total = MAX_CONCURRENCY;
|
|
85
106
|
|
|
86
107
|
const rate = `${successCount}/${total}`;
|
|
87
108
|
const summary = {
|
|
@@ -119,7 +140,7 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
|
|
|
119
140
|
};
|
|
120
141
|
}
|
|
121
142
|
|
|
122
|
-
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
|
|
143
|
+
async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
|
|
123
144
|
const fetchJson = async (url, opts) => {
|
|
124
145
|
const res = await fetch(url, opts);
|
|
125
146
|
if (!res.ok) {
|
|
@@ -154,7 +175,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
|
|
|
154
175
|
taskInput: TaskJson.task.instruction,
|
|
155
176
|
targetUrl: targetWeb,
|
|
156
177
|
sessionId: sessionId,
|
|
157
|
-
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
|
|
178
|
+
simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
|
|
179
|
+
browser: browser // Pass the shared browser instance
|
|
158
180
|
});
|
|
159
181
|
|
|
160
182
|
// 4. Verify Task
|
package/src/runner.js
CHANGED
|
@@ -4,7 +4,7 @@ import path from 'path';
|
|
|
4
4
|
import { queryAI, parseActions } from './ai.js';
|
|
5
5
|
import { executeAction } from './actions.js';
|
|
6
6
|
|
|
7
|
-
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
|
|
7
|
+
async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
|
|
8
8
|
console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
|
|
9
9
|
|
|
10
10
|
const screenshotsDir = path.join(process.cwd(), 'screenshots');
|
|
@@ -13,11 +13,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
13
13
|
console.log(`📁 创建截图目录: ${screenshotsDir}`);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
-
// 2. Launch Browser
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
// 2. Launch Browser (or use existing)
|
|
17
|
+
let browser;
|
|
18
|
+
let shouldCloseBrowser = false;
|
|
19
|
+
|
|
20
|
+
if (externalBrowser) {
|
|
21
|
+
browser = externalBrowser;
|
|
22
|
+
} else {
|
|
23
|
+
browser = await chromium.launch({
|
|
24
|
+
headless: true, // Visible for demo/debug
|
|
25
|
+
args: ['--start-maximized'] // Attempt to maximize
|
|
26
|
+
});
|
|
27
|
+
shouldCloseBrowser = true;
|
|
28
|
+
}
|
|
21
29
|
|
|
22
30
|
const context = await browser.newContext({
|
|
23
31
|
viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
|
|
@@ -84,7 +92,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
84
92
|
|
|
85
93
|
// 3. Query AI
|
|
86
94
|
console.log("Querying AI...");
|
|
87
|
-
|
|
95
|
+
|
|
96
|
+
// Optimization: Window mode & Filter images
|
|
97
|
+
// 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
|
|
98
|
+
let optimizedHistory = conversationHistory;
|
|
99
|
+
if (conversationHistory.length > 40) {
|
|
100
|
+
optimizedHistory = conversationHistory.slice(-40);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
|
|
104
|
+
// Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
|
|
105
|
+
optimizedHistory = optimizedHistory.map((msg, index) => {
|
|
106
|
+
// Determine if this message is within the "recent 3 rounds" window
|
|
107
|
+
// length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
|
|
108
|
+
const isRecent = index >= optimizedHistory.length - 6;
|
|
109
|
+
|
|
110
|
+
if (isRecent) return msg; // Keep recent messages intact (including images)
|
|
111
|
+
|
|
112
|
+
if (Array.isArray(msg.content)) {
|
|
113
|
+
return {
|
|
114
|
+
...msg,
|
|
115
|
+
content: msg.content.filter(c => c.type !== 'image')
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
return msg;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const aiContent = await queryAI(optimizedHistory);
|
|
88
122
|
|
|
89
123
|
if (!aiContent) {
|
|
90
124
|
console.error("Invalid AI response. Retrying...");
|
|
@@ -128,8 +162,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
|
|
|
128
162
|
console.error("Runtime Custom Error:", error);
|
|
129
163
|
return { status: 'error', message: error.message };
|
|
130
164
|
} finally {
|
|
131
|
-
|
|
132
|
-
|
|
165
|
+
if (shouldCloseBrowser) {
|
|
166
|
+
console.log("Closing browser...");
|
|
167
|
+
await browser.close();
|
|
168
|
+
} else {
|
|
169
|
+
console.log("Closing context...");
|
|
170
|
+
await context.close();
|
|
171
|
+
}
|
|
133
172
|
}
|
|
134
173
|
}
|
|
135
174
|
|