@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
package/dist/lib/index.js CHANGED
@@ -313,14 +313,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
313
313
  // src/common/agent.ts
314
314
  var import_utils11 = require("@midscene/core/utils");
315
315
  var import_constants = require("@midscene/shared/constants");
316
- var import_env3 = require("@midscene/shared/env");
317
- var import_logger3 = require("@midscene/shared/logger");
316
+ var import_env6 = require("@midscene/shared/env");
317
+ var import_logger4 = require("@midscene/shared/logger");
318
318
  var import_utils12 = require("@midscene/shared/utils");
319
319
 
320
320
  // src/common/tasks.ts
321
321
  var import_core = require("@midscene/core");
322
322
  var import_ai_model = require("@midscene/core/ai-model");
323
323
  var import_utils8 = require("@midscene/core/utils");
324
+ var import_env3 = require("@midscene/shared/env");
325
+ var import_env4 = require("@midscene/shared/env");
326
+ var import_env5 = require("@midscene/shared/env");
327
+ var import_img2 = require("@midscene/shared/img");
328
+ var import_logger2 = require("@midscene/shared/logger");
324
329
  var import_utils9 = require("@midscene/shared/utils");
325
330
 
326
331
  // src/common/task-cache.ts
@@ -423,7 +428,7 @@ function printReportMsg(filepath) {
423
428
  function replaceIllegalPathCharsAndSpace(str) {
424
429
  return str.replace(/[/\\:*?"<>| ]/g, "-");
425
430
  }
426
- function forceClosePopup(page, debug6) {
431
+ function forceClosePopup(page, debug7) {
427
432
  page.on("popup", async (popup) => {
428
433
  if (!popup) {
429
434
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -435,19 +440,19 @@ function forceClosePopup(page, debug6) {
435
440
  try {
436
441
  await popup.close();
437
442
  } catch (error) {
438
- debug6(`failed to close popup ${url}, error: ${error}`);
443
+ debug7(`failed to close popup ${url}, error: ${error}`);
439
444
  }
440
445
  } else {
441
- debug6(`popup is already closed, skip close ${url}`);
446
+ debug7(`popup is already closed, skip close ${url}`);
442
447
  }
443
448
  if (!page.isClosed()) {
444
449
  try {
445
450
  await page.goto(url);
446
451
  } catch (error) {
447
- debug6(`failed to goto ${url}, error: ${error}`);
452
+ debug7(`failed to goto ${url}, error: ${error}`);
448
453
  }
449
454
  } else {
450
- debug6(`page is already closed, skip goto ${url}`);
455
+ debug7(`page is already closed, skip goto ${url}`);
451
456
  }
452
457
  });
453
458
  }
@@ -729,6 +734,7 @@ function paramStr(task) {
729
734
  }
730
735
 
731
736
  // src/common/tasks.ts
737
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
732
738
  var replanningCountLimit = 10;
733
739
  var isAndroidPage = (page) => {
734
740
  return page.pageType === "android";
@@ -1317,13 +1323,37 @@ var PageTaskExecutor = class {
1317
1323
  };
1318
1324
  executorContext.task.recorder = [recordItem];
1319
1325
  executorContext.task.pageContext = pageContext;
1326
+ let imagePayload = pageContext.screenshotBase64;
1327
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1328
+ const size = pageContext.size;
1329
+ debug2("ui-tars-v1.5, will check image size", size);
1330
+ const currentPixels = size.width * size.height;
1331
+ const maxPixels = 16384 * 28 * 28;
1332
+ if (currentPixels > maxPixels) {
1333
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1334
+ const newWidth = Math.floor(size.width * resizeFactor);
1335
+ const newHeight = Math.floor(size.height * resizeFactor);
1336
+ debug2(
1337
+ "resize image",
1338
+ import_img2.imageInfo,
1339
+ "new width",
1340
+ newWidth,
1341
+ "new height",
1342
+ newHeight
1343
+ );
1344
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1345
+ width: newWidth,
1346
+ height: newHeight
1347
+ });
1348
+ }
1349
+ }
1320
1350
  this.appendConversationHistory({
1321
1351
  role: "user",
1322
1352
  content: [
1323
1353
  {
1324
1354
  type: "image_url",
1325
1355
  image_url: {
1326
- url: pageContext.screenshotBase64
1356
+ url: imagePayload
1327
1357
  }
1328
1358
  }
1329
1359
  ]
@@ -1666,9 +1696,9 @@ var PageTaskExecutor = class {
1666
1696
  };
1667
1697
 
1668
1698
  // src/common/plan-builder.ts
1669
- var import_logger2 = require("@midscene/shared/logger");
1699
+ var import_logger3 = require("@midscene/shared/logger");
1670
1700
  var import_utils10 = require("@midscene/shared/utils");
1671
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1701
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1672
1702
  function buildPlans(type, locateParam, param) {
1673
1703
  let returnPlans = [];
1674
1704
  const locatePlan = locateParam ? {
@@ -1730,14 +1760,14 @@ function buildPlans(type, locateParam, param) {
1730
1760
  returnPlans = [sleepPlan];
1731
1761
  }
1732
1762
  if (returnPlans) {
1733
- debug2("buildPlans", returnPlans);
1763
+ debug3("buildPlans", returnPlans);
1734
1764
  return returnPlans;
1735
1765
  }
1736
1766
  throw new Error(`Not supported type: ${type}`);
1737
1767
  }
1738
1768
 
1739
1769
  // src/common/agent.ts
1740
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1770
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1741
1771
  var PageAgent = class {
1742
1772
  constructor(page, opts) {
1743
1773
  /**
@@ -1780,7 +1810,7 @@ var PageAgent = class {
1780
1810
  });
1781
1811
  }
1782
1812
  return await parseContextFromWebPage(this.page, {
1783
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1813
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1784
1814
  });
1785
1815
  }
1786
1816
  async setAIActionContext(prompt) {
@@ -1815,7 +1845,7 @@ var PageAgent = class {
1815
1845
  type: "dump",
1816
1846
  generateReport
1817
1847
  });
1818
- debug3("writeOutActionDumps", this.reportFile);
1848
+ debug4("writeOutActionDumps", this.reportFile);
1819
1849
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1820
1850
  printReportMsg(this.reportFile);
1821
1851
  }
@@ -1919,7 +1949,7 @@ ${errorTask?.errorStack}`);
1919
1949
  return output;
1920
1950
  }
1921
1951
  async aiAction(taskPrompt) {
1922
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1952
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1923
1953
  this.afterTaskRunning(executor);
1924
1954
  return output;
1925
1955
  }
@@ -2006,9 +2036,9 @@ var import_utils15 = require("@midscene/core/utils");
2006
2036
  var import_constants2 = require("@midscene/shared/constants");
2007
2037
  var import_extractor2 = require("@midscene/shared/extractor");
2008
2038
  var import_fs2 = require("@midscene/shared/fs");
2009
- var import_logger4 = require("@midscene/shared/logger");
2039
+ var import_logger5 = require("@midscene/shared/logger");
2010
2040
  var import_utils16 = require("@midscene/shared/utils");
2011
- var debugPage = (0, import_logger4.getDebug)("web:page");
2041
+ var debugPage = (0, import_logger5.getDebug)("web:page");
2012
2042
  var Page = class {
2013
2043
  constructor(underlyingPage, pageType, opts) {
2014
2044
  this.everMoved = false;
@@ -2278,9 +2308,9 @@ var WebPage = class extends Page {
2278
2308
 
2279
2309
  // src/playwright/ai-fixture.ts
2280
2310
  var import_node_crypto = require("crypto");
2281
- var import_logger5 = require("@midscene/shared/logger");
2311
+ var import_logger6 = require("@midscene/shared/logger");
2282
2312
  var import_test = require("@playwright/test");
2283
- var debugPage2 = (0, import_logger5.getDebug)("web:playwright:ai-fixture");
2313
+ var debugPage2 = (0, import_logger6.getDebug)("web:playwright:ai-fixture");
2284
2314
  var groupAndCaseForTest = (testInfo) => {
2285
2315
  let taskFile;
2286
2316
  let taskTitle;
@@ -2467,16 +2497,16 @@ var PlaywrightAiFixture = (options) => {
2467
2497
  };
2468
2498
 
2469
2499
  // src/playwright/index.ts
2470
- var import_env4 = require("@midscene/shared/env");
2471
- var import_logger6 = require("@midscene/shared/logger");
2472
- var debug4 = (0, import_logger6.getDebug)("playwright:agent");
2500
+ var import_env7 = require("@midscene/shared/env");
2501
+ var import_logger7 = require("@midscene/shared/logger");
2502
+ var debug5 = (0, import_logger7.getDebug)("playwright:agent");
2473
2503
  var PlaywrightAgent = class extends PageAgent {
2474
2504
  constructor(page, opts) {
2475
2505
  const webPage = new WebPage(page);
2476
2506
  super(webPage, opts);
2477
2507
  const { forceSameTabNavigation = true } = opts ?? {};
2478
2508
  if (forceSameTabNavigation) {
2479
- forceClosePopup(page, debug4);
2509
+ forceClosePopup(page, debug5);
2480
2510
  }
2481
2511
  }
2482
2512
  async waitForNetworkIdle(timeout = 1e3) {
@@ -2485,7 +2515,7 @@ var PlaywrightAgent = class extends PageAgent {
2485
2515
  };
2486
2516
 
2487
2517
  // src/puppeteer/index.ts
2488
- var import_logger7 = require("@midscene/shared/logger");
2518
+ var import_logger8 = require("@midscene/shared/logger");
2489
2519
 
2490
2520
  // src/puppeteer/page.ts
2491
2521
  var import_constants3 = require("@midscene/shared/constants");
@@ -2509,15 +2539,15 @@ var WebPage2 = class extends Page {
2509
2539
  };
2510
2540
 
2511
2541
  // src/puppeteer/index.ts
2512
- var import_env5 = require("@midscene/shared/env");
2513
- var debug5 = (0, import_logger7.getDebug)("puppeteer:agent");
2542
+ var import_env8 = require("@midscene/shared/env");
2543
+ var debug6 = (0, import_logger8.getDebug)("puppeteer:agent");
2514
2544
  var PuppeteerAgent = class extends PageAgent {
2515
2545
  constructor(page, opts) {
2516
2546
  const webPage = new WebPage2(page);
2517
2547
  super(webPage, opts);
2518
2548
  const { forceSameTabNavigation = true } = opts ?? {};
2519
2549
  if (forceSameTabNavigation) {
2520
- forceClosePopup(page, debug5);
2550
+ forceClosePopup(page, debug6);
2521
2551
  }
2522
2552
  }
2523
2553
  };