rl-simulator-core 1.0.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,6 +1,6 @@
1
+ import { chromium } from 'playwright';
1
2
  import fs from 'fs';
2
3
  import path from 'path';
3
- import { chromium } from 'playwright';
4
4
  import { createOpenAI } from '@ai-sdk/openai';
5
5
  import { generateText } from 'ai';
6
6
  import dotenv from 'dotenv';
@@ -286,7 +286,7 @@ async function executeAction(page, action) {
286
286
  return 'CONTINUE';
287
287
  }
288
288
 
289
- async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
289
+ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
290
290
  console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
291
291
 
292
292
  const screenshotsDir = path.join(process.cwd(), 'screenshots');
@@ -295,11 +295,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
295
295
  console.log(`📁 创建截图目录: ${screenshotsDir}`);
296
296
  }
297
297
 
298
- // 2. Launch Browser
299
- const browser = await chromium.launch({
300
- headless: true, // Visible for demo/debug
301
- args: ['--start-maximized'] // Attempt to maximize
302
- });
298
+ // 2. Launch Browser (or use existing)
299
+ let browser;
300
+ let shouldCloseBrowser = false;
301
+
302
+ if (externalBrowser) {
303
+ browser = externalBrowser;
304
+ } else {
305
+ browser = await chromium.launch({
306
+ headless: true, // Visible for demo/debug
307
+ args: ['--start-maximized'] // Attempt to maximize
308
+ });
309
+ shouldCloseBrowser = true;
310
+ }
303
311
 
304
312
  const context = await browser.newContext({
305
313
  viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
@@ -366,7 +374,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
366
374
 
367
375
  // 3. Query AI
368
376
  console.log("Querying AI...");
369
- const aiContent = await queryAI(conversationHistory);
377
+
378
+ // Optimization: Window mode & Filter images
379
+ // 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
380
+ let optimizedHistory = conversationHistory;
381
+ if (conversationHistory.length > 40) {
382
+ optimizedHistory = conversationHistory.slice(-40);
383
+ }
384
+
385
+ // 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
386
+ // Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
387
+ optimizedHistory = optimizedHistory.map((msg, index) => {
388
+ // Determine if this message is within the "recent 3 rounds" window
389
+ // length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
390
+ const isRecent = index >= optimizedHistory.length - 6;
391
+
392
+ if (isRecent) return msg; // Keep recent messages intact (including images)
393
+
394
+ if (Array.isArray(msg.content)) {
395
+ return {
396
+ ...msg,
397
+ content: msg.content.filter(c => c.type !== 'image')
398
+ };
399
+ }
400
+ return msg;
401
+ });
402
+
403
+ const aiContent = await queryAI(optimizedHistory);
370
404
 
371
405
  if (!aiContent) {
372
406
  console.error("Invalid AI response. Retrying...");
@@ -410,8 +444,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
410
444
  console.error("Runtime Custom Error:", error);
411
445
  return { status: 'error', message: error.message };
412
446
  } finally {
413
- console.log("Closing browser...");
414
- await browser.close();
447
+ if (shouldCloseBrowser) {
448
+ console.log("Closing browser...");
449
+ await browser.close();
450
+ } else {
451
+ console.log("Closing context...");
452
+ await context.close();
453
+ }
415
454
  }
416
455
  }
417
456
 
@@ -434,7 +473,6 @@ function getFormattedDate() {
434
473
  * @param {string} taskId - The ID of the task to run
435
474
  */
436
475
  async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
437
- const results = [];
438
476
  const datetime = getFormattedDate();
439
477
  const folderName = `${taskId}_${datetime}`;
440
478
  const outputDir = path.join(process.cwd(), folderName);
@@ -451,37 +489,59 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
451
489
 
452
490
  console.log(`Starting Task Loop for ${taskId}`);
453
491
 
454
- for (let i = 1; i <= 5; i++) {
455
- console.log(`\n=== Starting Iteration ${i}/5 ===`);
456
- try {
457
- const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner);
458
- const { sessionId, agentMessages, ...rest } = data;
459
-
460
- // Create session subdirectory
461
- const sessionDir = path.join(outputDir, sessionId);
462
- if (!fs.existsSync(sessionDir)) {
463
- fs.mkdirSync(sessionDir, { recursive: true });
464
- }
492
+ // Launch a shared browser instance
493
+ const browser = await chromium.launch({
494
+ headless: true,
495
+ args: ['--start-maximized']
496
+ });
465
497
 
466
- // Save messages.json
467
- fs.writeFileSync(
468
- path.join(sessionDir, 'messages.json'),
469
- JSON.stringify(agentMessages, null, 2)
470
- );
498
+ const results = [];
499
+ const MAX_CONCURRENCY = 5;
471
500
 
472
- // Save result.json
473
- fs.writeFileSync(
474
- path.join(sessionDir, 'result.json'),
475
- JSON.stringify(rest, null, 2)
476
- );
501
+ try {
502
+ const promises = [];
503
+ for (let i = 1; i <= MAX_CONCURRENCY; i++) {
504
+ promises.push((async () => {
505
+ console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
506
+ try {
507
+ // Pass shared browser to executeSingleCycle
508
+ const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
509
+ const { sessionId, agentMessages, ...rest } = data;
510
+
511
+ // Create session subdirectory
512
+ const sessionDir = path.join(outputDir, sessionId);
513
+ if (!fs.existsSync(sessionDir)) {
514
+ fs.mkdirSync(sessionDir, { recursive: true });
515
+ }
516
+
517
+ // Save messages.json
518
+ fs.writeFileSync(
519
+ path.join(sessionDir, 'messages.json'),
520
+ JSON.stringify(agentMessages, null, 2)
521
+ );
522
+
523
+ // Save result.json
524
+ fs.writeFileSync(
525
+ path.join(sessionDir, 'result.json'),
526
+ JSON.stringify(rest, null, 2)
527
+ );
528
+
529
+ console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
530
+ return { sessionId, ...rest };
531
+ } catch (e) {
532
+ console.error(`Iteration ${i} failed:`, e.stack || e.message);
533
+ return { iteration: i, error: e.message };
534
+ }
535
+ })());
536
+ }
477
537
 
478
- results.push({ sessionId, ...rest });
479
- console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
538
+ // Wait for all to finish
539
+ const resultsArray = await Promise.all(promises);
540
+ results.push(...resultsArray);
480
541
 
481
- } catch (e) {
482
- console.error(`Iteration ${i} failed:`, e.stack || e.message);
483
- results.push({ iteration: i, error: e.message });
484
- }
542
+ } finally {
543
+ console.log("Closing shared browser...");
544
+ await browser.close();
485
545
  }
486
546
 
487
547
  // Save final-results.json (Consolidated)
@@ -489,7 +549,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
489
549
  results.filter(r => r.verifyResult);
490
550
  const scores = results.map(r => r.verifyResult?.score || 0);
491
551
  const successCount = scores.filter(s => s === 1).length;
492
- const total = 5; // Fixed 5 iterations
552
+ const total = MAX_CONCURRENCY;
493
553
 
494
554
  const rate = `${successCount}/${total}`;
495
555
  const summary = {
@@ -527,7 +587,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
527
587
  };
528
588
  }
529
589
 
530
- async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
590
+ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
531
591
  const fetchJson = async (url, opts) => {
532
592
  const res = await fetch(url, opts);
533
593
  if (!res.ok) {
@@ -562,7 +622,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
562
622
  taskInput: TaskJson.task.instruction,
563
623
  targetUrl: targetWeb,
564
624
  sessionId: sessionId,
565
- simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
625
+ simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
626
+ browser: browser // Pass the shared browser instance
566
627
  });
567
628
 
568
629
  // 4. Verify Task
package/dist/index.d.ts CHANGED
@@ -1,6 +1,6 @@
1
+ import { chromium } from 'playwright';
1
2
  import fs from 'fs';
2
3
  import path from 'path';
3
- import { chromium } from 'playwright';
4
4
  import { createOpenAI } from '@ai-sdk/openai';
5
5
  import { generateText } from 'ai';
6
6
  import dotenv from 'dotenv';
@@ -286,7 +286,7 @@ async function executeAction(page, action) {
286
286
  return 'CONTINUE';
287
287
  }
288
288
 
289
- async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
289
+ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
290
290
  console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
291
291
 
292
292
  const screenshotsDir = path.join(process.cwd(), 'screenshots');
@@ -295,11 +295,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
295
295
  console.log(`📁 创建截图目录: ${screenshotsDir}`);
296
296
  }
297
297
 
298
- // 2. Launch Browser
299
- const browser = await chromium.launch({
300
- headless: true, // Visible for demo/debug
301
- args: ['--start-maximized'] // Attempt to maximize
302
- });
298
+ // 2. Launch Browser (or use existing)
299
+ let browser;
300
+ let shouldCloseBrowser = false;
301
+
302
+ if (externalBrowser) {
303
+ browser = externalBrowser;
304
+ } else {
305
+ browser = await chromium.launch({
306
+ headless: true, // Visible for demo/debug
307
+ args: ['--start-maximized'] // Attempt to maximize
308
+ });
309
+ shouldCloseBrowser = true;
310
+ }
303
311
 
304
312
  const context = await browser.newContext({
305
313
  viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
@@ -366,7 +374,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
366
374
 
367
375
  // 3. Query AI
368
376
  console.log("Querying AI...");
369
- const aiContent = await queryAI(conversationHistory);
377
+
378
+ // Optimization: Window mode & Filter images
379
+ // 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
380
+ let optimizedHistory = conversationHistory;
381
+ if (conversationHistory.length > 40) {
382
+ optimizedHistory = conversationHistory.slice(-40);
383
+ }
384
+
385
+ // 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
386
+ // Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
387
+ optimizedHistory = optimizedHistory.map((msg, index) => {
388
+ // Determine if this message is within the "recent 3 rounds" window
389
+ // length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
390
+ const isRecent = index >= optimizedHistory.length - 6;
391
+
392
+ if (isRecent) return msg; // Keep recent messages intact (including images)
393
+
394
+ if (Array.isArray(msg.content)) {
395
+ return {
396
+ ...msg,
397
+ content: msg.content.filter(c => c.type !== 'image')
398
+ };
399
+ }
400
+ return msg;
401
+ });
402
+
403
+ const aiContent = await queryAI(optimizedHistory);
370
404
 
371
405
  if (!aiContent) {
372
406
  console.error("Invalid AI response. Retrying...");
@@ -410,8 +444,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
410
444
  console.error("Runtime Custom Error:", error);
411
445
  return { status: 'error', message: error.message };
412
446
  } finally {
413
- console.log("Closing browser...");
414
- await browser.close();
447
+ if (shouldCloseBrowser) {
448
+ console.log("Closing browser...");
449
+ await browser.close();
450
+ } else {
451
+ console.log("Closing context...");
452
+ await context.close();
453
+ }
415
454
  }
416
455
  }
417
456
 
@@ -434,7 +473,6 @@ function getFormattedDate() {
434
473
  * @param {string} taskId - The ID of the task to run
435
474
  */
436
475
  async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
437
- const results = [];
438
476
  const datetime = getFormattedDate();
439
477
  const folderName = `${taskId}_${datetime}`;
440
478
  const outputDir = path.join(process.cwd(), folderName);
@@ -451,37 +489,59 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
451
489
 
452
490
  console.log(`Starting Task Loop for ${taskId}`);
453
491
 
454
- for (let i = 1; i <= 5; i++) {
455
- console.log(`\n=== Starting Iteration ${i}/5 ===`);
456
- try {
457
- const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner);
458
- const { sessionId, agentMessages, ...rest } = data;
459
-
460
- // Create session subdirectory
461
- const sessionDir = path.join(outputDir, sessionId);
462
- if (!fs.existsSync(sessionDir)) {
463
- fs.mkdirSync(sessionDir, { recursive: true });
464
- }
492
+ // Launch a shared browser instance
493
+ const browser = await chromium.launch({
494
+ headless: true,
495
+ args: ['--start-maximized']
496
+ });
465
497
 
466
- // Save messages.json
467
- fs.writeFileSync(
468
- path.join(sessionDir, 'messages.json'),
469
- JSON.stringify(agentMessages, null, 2)
470
- );
498
+ const results = [];
499
+ const MAX_CONCURRENCY = 5;
471
500
 
472
- // Save result.json
473
- fs.writeFileSync(
474
- path.join(sessionDir, 'result.json'),
475
- JSON.stringify(rest, null, 2)
476
- );
501
+ try {
502
+ const promises = [];
503
+ for (let i = 1; i <= MAX_CONCURRENCY; i++) {
504
+ promises.push((async () => {
505
+ console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
506
+ try {
507
+ // Pass shared browser to executeSingleCycle
508
+ const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
509
+ const { sessionId, agentMessages, ...rest } = data;
510
+
511
+ // Create session subdirectory
512
+ const sessionDir = path.join(outputDir, sessionId);
513
+ if (!fs.existsSync(sessionDir)) {
514
+ fs.mkdirSync(sessionDir, { recursive: true });
515
+ }
516
+
517
+ // Save messages.json
518
+ fs.writeFileSync(
519
+ path.join(sessionDir, 'messages.json'),
520
+ JSON.stringify(agentMessages, null, 2)
521
+ );
522
+
523
+ // Save result.json
524
+ fs.writeFileSync(
525
+ path.join(sessionDir, 'result.json'),
526
+ JSON.stringify(rest, null, 2)
527
+ );
528
+
529
+ console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
530
+ return { sessionId, ...rest };
531
+ } catch (e) {
532
+ console.error(`Iteration ${i} failed:`, e.stack || e.message);
533
+ return { iteration: i, error: e.message };
534
+ }
535
+ })());
536
+ }
477
537
 
478
- results.push({ sessionId, ...rest });
479
- console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
538
+ // Wait for all to finish
539
+ const resultsArray = await Promise.all(promises);
540
+ results.push(...resultsArray);
480
541
 
481
- } catch (e) {
482
- console.error(`Iteration ${i} failed:`, e.stack || e.message);
483
- results.push({ iteration: i, error: e.message });
484
- }
542
+ } finally {
543
+ console.log("Closing shared browser...");
544
+ await browser.close();
485
545
  }
486
546
 
487
547
  // Save final-results.json (Consolidated)
@@ -489,7 +549,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
489
549
  results.filter(r => r.verifyResult);
490
550
  const scores = results.map(r => r.verifyResult?.score || 0);
491
551
  const successCount = scores.filter(s => s === 1).length;
492
- const total = 5; // Fixed 5 iterations
552
+ const total = MAX_CONCURRENCY;
493
553
 
494
554
  const rate = `${successCount}/${total}`;
495
555
  const summary = {
@@ -527,7 +587,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
527
587
  };
528
588
  }
529
589
 
530
- async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
590
+ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
531
591
  const fetchJson = async (url, opts) => {
532
592
  const res = await fetch(url, opts);
533
593
  if (!res.ok) {
@@ -562,7 +622,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
562
622
  taskInput: TaskJson.task.instruction,
563
623
  targetUrl: targetWeb,
564
624
  sessionId: sessionId,
565
- simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
625
+ simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
626
+ browser: browser // Pass the shared browser instance
566
627
  });
567
628
 
568
629
  // 4. Verify Task
package/dist/index.js CHANGED
@@ -33,6 +33,7 @@ __export(index_exports, {
33
33
  runTaskLoop: () => runTaskLoop
34
34
  });
35
35
  module.exports = __toCommonJS(index_exports);
36
+ var import_playwright2 = require("playwright");
36
37
  var import_fs2 = __toESM(require("fs"));
37
38
  var import_path3 = __toESM(require("path"));
38
39
 
@@ -294,19 +295,26 @@ async function executeAction(page, action) {
294
295
  }
295
296
 
296
297
  // src/runner.js
297
- async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
298
+ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
298
299
  console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
299
300
  const screenshotsDir = import_path2.default.join(process.cwd(), "screenshots");
300
301
  if (!import_fs.default.existsSync(screenshotsDir)) {
301
302
  import_fs.default.mkdirSync(screenshotsDir, { recursive: true });
302
303
  console.log(`\u{1F4C1} \u521B\u5EFA\u622A\u56FE\u76EE\u5F55: ${screenshotsDir}`);
303
304
  }
304
- const browser = await import_playwright.chromium.launch({
305
- headless: true,
306
- // Visible for demo/debug
307
- args: ["--start-maximized"]
308
- // Attempt to maximize
309
- });
305
+ let browser;
306
+ let shouldCloseBrowser = false;
307
+ if (externalBrowser) {
308
+ browser = externalBrowser;
309
+ } else {
310
+ browser = await import_playwright.chromium.launch({
311
+ headless: true,
312
+ // Visible for demo/debug
313
+ args: ["--start-maximized"]
314
+ // Attempt to maximize
315
+ });
316
+ shouldCloseBrowser = true;
317
+ }
310
318
  const context = await browser.newContext({
311
319
  viewport: { width: 430, height: 800 }
312
320
  // Set a reasonable fixed viewport
@@ -358,7 +366,22 @@ Please perform the next action based on the screenshot.`;
358
366
  content: userContent
359
367
  });
360
368
  console.log("Querying AI...");
361
- const aiContent = await queryAI(conversationHistory);
369
+ let optimizedHistory = conversationHistory;
370
+ if (conversationHistory.length > 40) {
371
+ optimizedHistory = conversationHistory.slice(-40);
372
+ }
373
+ optimizedHistory = optimizedHistory.map((msg, index) => {
374
+ const isRecent = index >= optimizedHistory.length - 6;
375
+ if (isRecent) return msg;
376
+ if (Array.isArray(msg.content)) {
377
+ return {
378
+ ...msg,
379
+ content: msg.content.filter((c) => c.type !== "image")
380
+ };
381
+ }
382
+ return msg;
383
+ });
384
+ const aiContent = await queryAI(optimizedHistory);
362
385
  if (!aiContent) {
363
386
  console.error("Invalid AI response. Retrying...");
364
387
  await page.waitForTimeout(2e3);
@@ -391,8 +414,13 @@ Please perform the next action based on the screenshot.`;
391
414
  console.error("Runtime Custom Error:", error);
392
415
  return { status: "error", message: error.message };
393
416
  } finally {
394
- console.log("Closing browser...");
395
- await browser.close();
417
+ if (shouldCloseBrowser) {
418
+ console.log("Closing browser...");
419
+ await browser.close();
420
+ } else {
421
+ console.log("Closing context...");
422
+ await context.close();
423
+ }
396
424
  }
397
425
  }
398
426
 
@@ -406,7 +434,6 @@ function getFormattedDate() {
406
434
  return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
407
435
  }
408
436
  async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
409
- const results = [];
410
437
  const datetime = getFormattedDate();
411
438
  const folderName = `${taskId}_${datetime}`;
412
439
  const outputDir = import_path3.default.join(process.cwd(), folderName);
@@ -417,30 +444,46 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
417
444
  if (!targetWeb.startsWith("http")) targetWeb = "http://" + targetWeb;
418
445
  if (!targetServer.startsWith("http")) targetServer = "http://" + targetServer;
419
446
  console.log(`Starting Task Loop for ${taskId}`);
420
- for (let i = 1; i <= 5; i++) {
421
- console.log(`
422
- === Starting Iteration ${i}/5 ===`);
423
- try {
424
- const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner);
425
- const { sessionId, agentMessages, ...rest } = data;
426
- const sessionDir = import_path3.default.join(outputDir, sessionId);
427
- if (!import_fs2.default.existsSync(sessionDir)) {
428
- import_fs2.default.mkdirSync(sessionDir, { recursive: true });
429
- }
430
- import_fs2.default.writeFileSync(
431
- import_path3.default.join(sessionDir, "messages.json"),
432
- JSON.stringify(agentMessages, null, 2)
433
- );
434
- import_fs2.default.writeFileSync(
435
- import_path3.default.join(sessionDir, "result.json"),
436
- JSON.stringify(rest, null, 2)
437
- );
438
- results.push({ sessionId, ...rest });
439
- console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
440
- } catch (e) {
441
- console.error(`Iteration ${i} failed:`, e.stack || e.message);
442
- results.push({ iteration: i, error: e.message });
447
+ const browser = await import_playwright2.chromium.launch({
448
+ headless: true,
449
+ args: ["--start-maximized"]
450
+ });
451
+ const results = [];
452
+ const MAX_CONCURRENCY = 5;
453
+ try {
454
+ const promises = [];
455
+ for (let i = 1; i <= MAX_CONCURRENCY; i++) {
456
+ promises.push((async () => {
457
+ console.log(`
458
+ === Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
459
+ try {
460
+ const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
461
+ const { sessionId, agentMessages, ...rest } = data;
462
+ const sessionDir = import_path3.default.join(outputDir, sessionId);
463
+ if (!import_fs2.default.existsSync(sessionDir)) {
464
+ import_fs2.default.mkdirSync(sessionDir, { recursive: true });
465
+ }
466
+ import_fs2.default.writeFileSync(
467
+ import_path3.default.join(sessionDir, "messages.json"),
468
+ JSON.stringify(agentMessages, null, 2)
469
+ );
470
+ import_fs2.default.writeFileSync(
471
+ import_path3.default.join(sessionDir, "result.json"),
472
+ JSON.stringify(rest, null, 2)
473
+ );
474
+ console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
475
+ return { sessionId, ...rest };
476
+ } catch (e) {
477
+ console.error(`Iteration ${i} failed:`, e.stack || e.message);
478
+ return { iteration: i, error: e.message };
479
+ }
480
+ })());
443
481
  }
482
+ const resultsArray = await Promise.all(promises);
483
+ results.push(...resultsArray);
484
+ } finally {
485
+ console.log("Closing shared browser...");
486
+ await browser.close();
444
487
  }
445
488
  const validResults = results.filter((r) => r.verifyResult);
446
489
  const scores = results.map((r) => {
@@ -448,7 +491,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
448
491
  return ((_a = r.verifyResult) == null ? void 0 : _a.score) || 0;
449
492
  });
450
493
  const successCount = scores.filter((s) => s === 1).length;
451
- const total = 5;
494
+ const total = MAX_CONCURRENCY;
452
495
  const rate = `${successCount}/${total}`;
453
496
  const summary = {
454
497
  rate,
@@ -479,7 +522,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
479
522
  taskId
480
523
  };
481
524
  }
482
- async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
525
+ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
483
526
  const fetchJson = async (url, opts) => {
484
527
  const res = await fetch(url, opts);
485
528
  if (!res.ok) {
@@ -507,7 +550,9 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
507
550
  taskInput: TaskJson.task.instruction,
508
551
  targetUrl: targetWeb,
509
552
  sessionId,
510
- simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
553
+ simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
554
+ browser
555
+ // Pass the shared browser instance
511
556
  });
512
557
  console.log("Verifying result...");
513
558
  const verifyUrl = `${targetServer}/api/verify/run`;
package/dist/index.mjs CHANGED
@@ -1,4 +1,5 @@
1
1
  // src/index.js
2
+ import { chromium as chromium2 } from "playwright";
2
3
  import fs2 from "fs";
3
4
  import path3 from "path";
4
5
 
@@ -260,19 +261,26 @@ async function executeAction(page, action) {
260
261
  }
261
262
 
262
263
  // src/runner.js
263
- async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
264
+ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
264
265
  console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
265
266
  const screenshotsDir = path2.join(process.cwd(), "screenshots");
266
267
  if (!fs.existsSync(screenshotsDir)) {
267
268
  fs.mkdirSync(screenshotsDir, { recursive: true });
268
269
  console.log(`\u{1F4C1} \u521B\u5EFA\u622A\u56FE\u76EE\u5F55: ${screenshotsDir}`);
269
270
  }
270
- const browser = await chromium.launch({
271
- headless: true,
272
- // Visible for demo/debug
273
- args: ["--start-maximized"]
274
- // Attempt to maximize
275
- });
271
+ let browser;
272
+ let shouldCloseBrowser = false;
273
+ if (externalBrowser) {
274
+ browser = externalBrowser;
275
+ } else {
276
+ browser = await chromium.launch({
277
+ headless: true,
278
+ // Visible for demo/debug
279
+ args: ["--start-maximized"]
280
+ // Attempt to maximize
281
+ });
282
+ shouldCloseBrowser = true;
283
+ }
276
284
  const context = await browser.newContext({
277
285
  viewport: { width: 430, height: 800 }
278
286
  // Set a reasonable fixed viewport
@@ -324,7 +332,22 @@ Please perform the next action based on the screenshot.`;
324
332
  content: userContent
325
333
  });
326
334
  console.log("Querying AI...");
327
- const aiContent = await queryAI(conversationHistory);
335
+ let optimizedHistory = conversationHistory;
336
+ if (conversationHistory.length > 40) {
337
+ optimizedHistory = conversationHistory.slice(-40);
338
+ }
339
+ optimizedHistory = optimizedHistory.map((msg, index) => {
340
+ const isRecent = index >= optimizedHistory.length - 6;
341
+ if (isRecent) return msg;
342
+ if (Array.isArray(msg.content)) {
343
+ return {
344
+ ...msg,
345
+ content: msg.content.filter((c) => c.type !== "image")
346
+ };
347
+ }
348
+ return msg;
349
+ });
350
+ const aiContent = await queryAI(optimizedHistory);
328
351
  if (!aiContent) {
329
352
  console.error("Invalid AI response. Retrying...");
330
353
  await page.waitForTimeout(2e3);
@@ -357,8 +380,13 @@ Please perform the next action based on the screenshot.`;
357
380
  console.error("Runtime Custom Error:", error);
358
381
  return { status: "error", message: error.message };
359
382
  } finally {
360
- console.log("Closing browser...");
361
- await browser.close();
383
+ if (shouldCloseBrowser) {
384
+ console.log("Closing browser...");
385
+ await browser.close();
386
+ } else {
387
+ console.log("Closing context...");
388
+ await context.close();
389
+ }
362
390
  }
363
391
  }
364
392
 
@@ -372,7 +400,6 @@ function getFormattedDate() {
372
400
  return `${now.getFullYear()}${pad(now.getMonth() + 1)}${pad(now.getDate())}_${pad(now.getHours())}${pad(now.getMinutes())}${pad(now.getSeconds())}`;
373
401
  }
374
402
  async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
375
- const results = [];
376
403
  const datetime = getFormattedDate();
377
404
  const folderName = `${taskId}_${datetime}`;
378
405
  const outputDir = path3.join(process.cwd(), folderName);
@@ -383,30 +410,46 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
383
410
  if (!targetWeb.startsWith("http")) targetWeb = "http://" + targetWeb;
384
411
  if (!targetServer.startsWith("http")) targetServer = "http://" + targetServer;
385
412
  console.log(`Starting Task Loop for ${taskId}`);
386
- for (let i = 1; i <= 5; i++) {
387
- console.log(`
388
- === Starting Iteration ${i}/5 ===`);
389
- try {
390
- const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner);
391
- const { sessionId, agentMessages, ...rest } = data;
392
- const sessionDir = path3.join(outputDir, sessionId);
393
- if (!fs2.existsSync(sessionDir)) {
394
- fs2.mkdirSync(sessionDir, { recursive: true });
395
- }
396
- fs2.writeFileSync(
397
- path3.join(sessionDir, "messages.json"),
398
- JSON.stringify(agentMessages, null, 2)
399
- );
400
- fs2.writeFileSync(
401
- path3.join(sessionDir, "result.json"),
402
- JSON.stringify(rest, null, 2)
403
- );
404
- results.push({ sessionId, ...rest });
405
- console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
406
- } catch (e) {
407
- console.error(`Iteration ${i} failed:`, e.stack || e.message);
408
- results.push({ iteration: i, error: e.message });
413
+ const browser = await chromium2.launch({
414
+ headless: true,
415
+ args: ["--start-maximized"]
416
+ });
417
+ const results = [];
418
+ const MAX_CONCURRENCY = 5;
419
+ try {
420
+ const promises = [];
421
+ for (let i = 1; i <= MAX_CONCURRENCY; i++) {
422
+ promises.push((async () => {
423
+ console.log(`
424
+ === Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
425
+ try {
426
+ const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
427
+ const { sessionId, agentMessages, ...rest } = data;
428
+ const sessionDir = path3.join(outputDir, sessionId);
429
+ if (!fs2.existsSync(sessionDir)) {
430
+ fs2.mkdirSync(sessionDir, { recursive: true });
431
+ }
432
+ fs2.writeFileSync(
433
+ path3.join(sessionDir, "messages.json"),
434
+ JSON.stringify(agentMessages, null, 2)
435
+ );
436
+ fs2.writeFileSync(
437
+ path3.join(sessionDir, "result.json"),
438
+ JSON.stringify(rest, null, 2)
439
+ );
440
+ console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
441
+ return { sessionId, ...rest };
442
+ } catch (e) {
443
+ console.error(`Iteration ${i} failed:`, e.stack || e.message);
444
+ return { iteration: i, error: e.message };
445
+ }
446
+ })());
409
447
  }
448
+ const resultsArray = await Promise.all(promises);
449
+ results.push(...resultsArray);
450
+ } finally {
451
+ console.log("Closing shared browser...");
452
+ await browser.close();
410
453
  }
411
454
  const validResults = results.filter((r) => r.verifyResult);
412
455
  const scores = results.map((r) => {
@@ -414,7 +457,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
414
457
  return ((_a = r.verifyResult) == null ? void 0 : _a.score) || 0;
415
458
  });
416
459
  const successCount = scores.filter((s) => s === 1).length;
417
- const total = 5;
460
+ const total = MAX_CONCURRENCY;
418
461
  const rate = `${successCount}/${total}`;
419
462
  const summary = {
420
463
  rate,
@@ -445,7 +488,7 @@ async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAge
445
488
  taskId
446
489
  };
447
490
  }
448
- async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
491
+ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
449
492
  const fetchJson = async (url, opts) => {
450
493
  const res = await fetch(url, opts);
451
494
  if (!res.ok) {
@@ -473,7 +516,9 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
473
516
  taskInput: TaskJson.task.instruction,
474
517
  targetUrl: targetWeb,
475
518
  sessionId,
476
- simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
519
+ simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
520
+ browser
521
+ // Pass the shared browser instance
477
522
  });
478
523
  console.log("Verifying result...");
479
524
  const verifyUrl = `${targetServer}/api/verify/run`;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rl-simulator-core",
3
- "version": "1.0.8",
3
+ "version": "1.2.0",
4
4
  "main": "./dist/index.js",
5
5
  "module": "./dist/index.mjs",
6
6
  "types": "./dist/index.d.ts",
package/src/index.js CHANGED
@@ -1,4 +1,4 @@
1
-
1
+ import { chromium } from 'playwright';
2
2
  import fs from 'fs';
3
3
  import path from 'path';
4
4
  import { runAgent } from './runner.js';
@@ -26,7 +26,6 @@ function getFormattedDate() {
26
26
  * @param {string} taskId - The ID of the task to run
27
27
  */
28
28
  export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner = runAgent) {
29
- const results = [];
30
29
  const datetime = getFormattedDate();
31
30
  const folderName = `${taskId}_${datetime}`;
32
31
  const outputDir = path.join(process.cwd(), folderName);
@@ -43,37 +42,59 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
43
42
 
44
43
  console.log(`Starting Task Loop for ${taskId}`);
45
44
 
46
- for (let i = 1; i <= 5; i++) {
47
- console.log(`\n=== Starting Iteration ${i}/5 ===`);
48
- try {
49
- const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner);
50
- const { sessionId, agentMessages, ...rest } = data;
51
-
52
- // Create session subdirectory
53
- const sessionDir = path.join(outputDir, sessionId);
54
- if (!fs.existsSync(sessionDir)) {
55
- fs.mkdirSync(sessionDir, { recursive: true });
56
- }
57
-
58
- // Save messages.json
59
- fs.writeFileSync(
60
- path.join(sessionDir, 'messages.json'),
61
- JSON.stringify(agentMessages, null, 2)
62
- );
63
-
64
- // Save result.json
65
- fs.writeFileSync(
66
- path.join(sessionDir, 'result.json'),
67
- JSON.stringify(rest, null, 2)
68
- );
69
-
70
- results.push({ sessionId, ...rest });
71
- console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
72
-
73
- } catch (e) {
74
- console.error(`Iteration ${i} failed:`, e.stack || e.message);
75
- results.push({ iteration: i, error: e.message });
45
+ // Launch a shared browser instance
46
+ const browser = await chromium.launch({
47
+ headless: true,
48
+ args: ['--start-maximized']
49
+ });
50
+
51
+ const results = [];
52
+ const MAX_CONCURRENCY = 5;
53
+
54
+ try {
55
+ const promises = [];
56
+ for (let i = 1; i <= MAX_CONCURRENCY; i++) {
57
+ promises.push((async () => {
58
+ console.log(`\n=== Starting Iteration ${i}/${MAX_CONCURRENCY} ===`);
59
+ try {
60
+ // Pass shared browser to executeSingleCycle
61
+ const data = await executeSingleCycle(targetWeb, targetServer, taskId, i, agentRunner, browser);
62
+ const { sessionId, agentMessages, ...rest } = data;
63
+
64
+ // Create session subdirectory
65
+ const sessionDir = path.join(outputDir, sessionId);
66
+ if (!fs.existsSync(sessionDir)) {
67
+ fs.mkdirSync(sessionDir, { recursive: true });
68
+ }
69
+
70
+ // Save messages.json
71
+ fs.writeFileSync(
72
+ path.join(sessionDir, 'messages.json'),
73
+ JSON.stringify(agentMessages, null, 2)
74
+ );
75
+
76
+ // Save result.json
77
+ fs.writeFileSync(
78
+ path.join(sessionDir, 'result.json'),
79
+ JSON.stringify(rest, null, 2)
80
+ );
81
+
82
+ console.log(`Iteration ${i} completed. Saved to ${sessionDir}`);
83
+ return { sessionId, ...rest };
84
+ } catch (e) {
85
+ console.error(`Iteration ${i} failed:`, e.stack || e.message);
86
+ return { iteration: i, error: e.message };
87
+ }
88
+ })());
76
89
  }
90
+
91
+ // Wait for all to finish
92
+ const resultsArray = await Promise.all(promises);
93
+ results.push(...resultsArray);
94
+
95
+ } finally {
96
+ console.log("Closing shared browser...");
97
+ await browser.close();
77
98
  }
78
99
 
79
100
  // Save final-results.json (Consolidated)
@@ -81,7 +102,7 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
81
102
  const validResults = results.filter(r => r.verifyResult);
82
103
  const scores = results.map(r => r.verifyResult?.score || 0);
83
104
  const successCount = scores.filter(s => s === 1).length;
84
- const total = 5; // Fixed 5 iterations
105
+ const total = MAX_CONCURRENCY;
85
106
 
86
107
  const rate = `${successCount}/${total}`;
87
108
  const summary = {
@@ -119,7 +140,7 @@ export async function runTaskLoop(targetWeb, targetServer, taskId, agentRunner =
119
140
  };
120
141
  }
121
142
 
122
- async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner) {
143
+ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, agentRunner, browser) {
123
144
  const fetchJson = async (url, opts) => {
124
145
  const res = await fetch(url, opts);
125
146
  if (!res.ok) {
@@ -154,7 +175,8 @@ async function executeSingleCycle(targetWeb, targetServer, taskId, iteration, ag
154
175
  taskInput: TaskJson.task.instruction,
155
176
  targetUrl: targetWeb,
156
177
  sessionId: sessionId,
157
- simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info
178
+ simulatedUserKnownInfo: TaskJson.task.simulated_user_known_info,
179
+ browser: browser // Pass the shared browser instance
158
180
  });
159
181
 
160
182
  // 4. Verify Task
package/src/runner.js CHANGED
@@ -4,7 +4,7 @@ import path from 'path';
4
4
  import { queryAI, parseActions } from './ai.js';
5
5
  import { executeAction } from './actions.js';
6
6
 
7
- async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo }) {
7
+ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInfo, browser: externalBrowser }) {
8
8
  console.log(`[Agent] Starting task: ${taskInput} on ${targetUrl} (Session: ${sessionId})`);
9
9
 
10
10
  const screenshotsDir = path.join(process.cwd(), 'screenshots');
@@ -13,11 +13,19 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
13
13
  console.log(`📁 创建截图目录: ${screenshotsDir}`);
14
14
  }
15
15
 
16
- // 2. Launch Browser
17
- const browser = await chromium.launch({
18
- headless: true, // Visible for demo/debug
19
- args: ['--start-maximized'] // Attempt to maximize
20
- });
16
+ // 2. Launch Browser (or use existing)
17
+ let browser;
18
+ let shouldCloseBrowser = false;
19
+
20
+ if (externalBrowser) {
21
+ browser = externalBrowser;
22
+ } else {
23
+ browser = await chromium.launch({
24
+ headless: true, // Visible for demo/debug
25
+ args: ['--start-maximized'] // Attempt to maximize
26
+ });
27
+ shouldCloseBrowser = true;
28
+ }
21
29
 
22
30
  const context = await browser.newContext({
23
31
  viewport: { width: 430, height: 800 } // Set a reasonable fixed viewport
@@ -84,7 +92,33 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
84
92
 
85
93
  // 3. Query AI
86
94
  console.log("Querying AI...");
87
- const aiContent = await queryAI(conversationHistory);
95
+
96
+ // Optimization: Window mode & Filter images
97
+ // 1. Keep last 20 rounds (40 messages) - Better memory than 3 rounds, but prevents infinite growth
98
+ let optimizedHistory = conversationHistory;
99
+ if (conversationHistory.length > 40) {
100
+ optimizedHistory = conversationHistory.slice(-40);
101
+ }
102
+
103
+ // 2. Filter images from older messages (Keep images only in the last 3 rounds / 6 messages)
104
+ // Strategy: "Fading Memory" - Recent = Full (Text+Img), Older = Text only
105
+ optimizedHistory = optimizedHistory.map((msg, index) => {
106
+ // Determine if this message is within the "recent 3 rounds" window
107
+ // length - 1 is the last item. length - 6 is the start of the last 3 rounds (User-AI, User-AI, User-AI)
108
+ const isRecent = index >= optimizedHistory.length - 6;
109
+
110
+ if (isRecent) return msg; // Keep recent messages intact (including images)
111
+
112
+ if (Array.isArray(msg.content)) {
113
+ return {
114
+ ...msg,
115
+ content: msg.content.filter(c => c.type !== 'image')
116
+ };
117
+ }
118
+ return msg;
119
+ });
120
+
121
+ const aiContent = await queryAI(optimizedHistory);
88
122
 
89
123
  if (!aiContent) {
90
124
  console.error("Invalid AI response. Retrying...");
@@ -128,8 +162,13 @@ async function runAgent({ targetUrl, taskInput, sessionId, simulatedUserKnownInf
128
162
  console.error("Runtime Custom Error:", error);
129
163
  return { status: 'error', message: error.message };
130
164
  } finally {
131
- console.log("Closing browser...");
132
- await browser.close();
165
+ if (shouldCloseBrowser) {
166
+ console.log("Closing browser...");
167
+ await browser.close();
168
+ } else {
169
+ console.log("Closing context...");
170
+ await context.close();
171
+ }
133
172
  }
134
173
  }
135
174