@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -33,7 +33,7 @@ __export(playwright_exports, {
33
33
  PlaywrightAgent: () => PlaywrightAgent,
34
34
  PlaywrightAiFixture: () => PlaywrightAiFixture,
35
35
  PlaywrightWebPage: () => WebPage,
36
- overrideAIConfig: () => import_env4.overrideAIConfig
36
+ overrideAIConfig: () => import_env7.overrideAIConfig
37
37
  });
38
38
  module.exports = __toCommonJS(playwright_exports);
39
39
 
@@ -309,14 +309,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
309
309
  // src/common/agent.ts
310
310
  var import_utils11 = require("@midscene/core/utils");
311
311
  var import_constants = require("@midscene/shared/constants");
312
- var import_env3 = require("@midscene/shared/env");
313
- var import_logger3 = require("@midscene/shared/logger");
312
+ var import_env6 = require("@midscene/shared/env");
313
+ var import_logger4 = require("@midscene/shared/logger");
314
314
  var import_utils12 = require("@midscene/shared/utils");
315
315
 
316
316
  // src/common/tasks.ts
317
317
  var import_core = require("@midscene/core");
318
318
  var import_ai_model = require("@midscene/core/ai-model");
319
319
  var import_utils8 = require("@midscene/core/utils");
320
+ var import_env3 = require("@midscene/shared/env");
321
+ var import_env4 = require("@midscene/shared/env");
322
+ var import_env5 = require("@midscene/shared/env");
323
+ var import_img2 = require("@midscene/shared/img");
324
+ var import_logger2 = require("@midscene/shared/logger");
320
325
  var import_utils9 = require("@midscene/shared/utils");
321
326
 
322
327
  // src/common/task-cache.ts
@@ -419,7 +424,7 @@ function printReportMsg(filepath) {
419
424
  function replaceIllegalPathCharsAndSpace(str) {
420
425
  return str.replace(/[/\\:*?"<>| ]/g, "-");
421
426
  }
422
- function forceClosePopup(page, debug5) {
427
+ function forceClosePopup(page, debug6) {
423
428
  page.on("popup", async (popup) => {
424
429
  if (!popup) {
425
430
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -431,19 +436,19 @@ function forceClosePopup(page, debug5) {
431
436
  try {
432
437
  await popup.close();
433
438
  } catch (error) {
434
- debug5(`failed to close popup ${url}, error: ${error}`);
439
+ debug6(`failed to close popup ${url}, error: ${error}`);
435
440
  }
436
441
  } else {
437
- debug5(`popup is already closed, skip close ${url}`);
442
+ debug6(`popup is already closed, skip close ${url}`);
438
443
  }
439
444
  if (!page.isClosed()) {
440
445
  try {
441
446
  await page.goto(url);
442
447
  } catch (error) {
443
- debug5(`failed to goto ${url}, error: ${error}`);
448
+ debug6(`failed to goto ${url}, error: ${error}`);
444
449
  }
445
450
  } else {
446
- debug5(`page is already closed, skip goto ${url}`);
451
+ debug6(`page is already closed, skip goto ${url}`);
447
452
  }
448
453
  });
449
454
  }
@@ -725,6 +730,7 @@ function paramStr(task) {
725
730
  }
726
731
 
727
732
  // src/common/tasks.ts
733
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
728
734
  var replanningCountLimit = 10;
729
735
  var isAndroidPage = (page) => {
730
736
  return page.pageType === "android";
@@ -1313,13 +1319,37 @@ var PageTaskExecutor = class {
1313
1319
  };
1314
1320
  executorContext.task.recorder = [recordItem];
1315
1321
  executorContext.task.pageContext = pageContext;
1322
+ let imagePayload = pageContext.screenshotBase64;
1323
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1324
+ const size = pageContext.size;
1325
+ debug2("ui-tars-v1.5, will check image size", size);
1326
+ const currentPixels = size.width * size.height;
1327
+ const maxPixels = 16384 * 28 * 28;
1328
+ if (currentPixels > maxPixels) {
1329
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1330
+ const newWidth = Math.floor(size.width * resizeFactor);
1331
+ const newHeight = Math.floor(size.height * resizeFactor);
1332
+ debug2(
1333
+ "resize image",
1334
+ import_img2.imageInfo,
1335
+ "new width",
1336
+ newWidth,
1337
+ "new height",
1338
+ newHeight
1339
+ );
1340
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1341
+ width: newWidth,
1342
+ height: newHeight
1343
+ });
1344
+ }
1345
+ }
1316
1346
  this.appendConversationHistory({
1317
1347
  role: "user",
1318
1348
  content: [
1319
1349
  {
1320
1350
  type: "image_url",
1321
1351
  image_url: {
1322
- url: pageContext.screenshotBase64
1352
+ url: imagePayload
1323
1353
  }
1324
1354
  }
1325
1355
  ]
@@ -1662,9 +1692,9 @@ var PageTaskExecutor = class {
1662
1692
  };
1663
1693
 
1664
1694
  // src/common/plan-builder.ts
1665
- var import_logger2 = require("@midscene/shared/logger");
1695
+ var import_logger3 = require("@midscene/shared/logger");
1666
1696
  var import_utils10 = require("@midscene/shared/utils");
1667
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1697
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1668
1698
  function buildPlans(type, locateParam, param) {
1669
1699
  let returnPlans = [];
1670
1700
  const locatePlan = locateParam ? {
@@ -1726,14 +1756,14 @@ function buildPlans(type, locateParam, param) {
1726
1756
  returnPlans = [sleepPlan];
1727
1757
  }
1728
1758
  if (returnPlans) {
1729
- debug2("buildPlans", returnPlans);
1759
+ debug3("buildPlans", returnPlans);
1730
1760
  return returnPlans;
1731
1761
  }
1732
1762
  throw new Error(`Not supported type: ${type}`);
1733
1763
  }
1734
1764
 
1735
1765
  // src/common/agent.ts
1736
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1766
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1737
1767
  var PageAgent = class {
1738
1768
  constructor(page, opts) {
1739
1769
  /**
@@ -1776,7 +1806,7 @@ var PageAgent = class {
1776
1806
  });
1777
1807
  }
1778
1808
  return await parseContextFromWebPage(this.page, {
1779
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1809
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1780
1810
  });
1781
1811
  }
1782
1812
  async setAIActionContext(prompt) {
@@ -1811,7 +1841,7 @@ var PageAgent = class {
1811
1841
  type: "dump",
1812
1842
  generateReport
1813
1843
  });
1814
- debug3("writeOutActionDumps", this.reportFile);
1844
+ debug4("writeOutActionDumps", this.reportFile);
1815
1845
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1816
1846
  printReportMsg(this.reportFile);
1817
1847
  }
@@ -1915,7 +1945,7 @@ ${errorTask?.errorStack}`);
1915
1945
  return output;
1916
1946
  }
1917
1947
  async aiAction(taskPrompt) {
1918
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1948
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1919
1949
  this.afterTaskRunning(executor);
1920
1950
  return output;
1921
1951
  }
@@ -2002,9 +2032,9 @@ var import_utils15 = require("@midscene/core/utils");
2002
2032
  var import_constants2 = require("@midscene/shared/constants");
2003
2033
  var import_extractor2 = require("@midscene/shared/extractor");
2004
2034
  var import_fs2 = require("@midscene/shared/fs");
2005
- var import_logger4 = require("@midscene/shared/logger");
2035
+ var import_logger5 = require("@midscene/shared/logger");
2006
2036
  var import_utils16 = require("@midscene/shared/utils");
2007
- var debugPage = (0, import_logger4.getDebug)("web:page");
2037
+ var debugPage = (0, import_logger5.getDebug)("web:page");
2008
2038
  var Page = class {
2009
2039
  constructor(underlyingPage, pageType, opts) {
2010
2040
  this.everMoved = false;
@@ -2274,9 +2304,9 @@ var WebPage = class extends Page {
2274
2304
 
2275
2305
  // src/playwright/ai-fixture.ts
2276
2306
  var import_node_crypto = require("crypto");
2277
- var import_logger5 = require("@midscene/shared/logger");
2307
+ var import_logger6 = require("@midscene/shared/logger");
2278
2308
  var import_test = require("@playwright/test");
2279
- var debugPage2 = (0, import_logger5.getDebug)("web:playwright:ai-fixture");
2309
+ var debugPage2 = (0, import_logger6.getDebug)("web:playwright:ai-fixture");
2280
2310
  var groupAndCaseForTest = (testInfo) => {
2281
2311
  let taskFile;
2282
2312
  let taskTitle;
@@ -2463,16 +2493,16 @@ var PlaywrightAiFixture = (options) => {
2463
2493
  };
2464
2494
 
2465
2495
  // src/playwright/index.ts
2466
- var import_env4 = require("@midscene/shared/env");
2467
- var import_logger6 = require("@midscene/shared/logger");
2468
- var debug4 = (0, import_logger6.getDebug)("playwright:agent");
2496
+ var import_env7 = require("@midscene/shared/env");
2497
+ var import_logger7 = require("@midscene/shared/logger");
2498
+ var debug5 = (0, import_logger7.getDebug)("playwright:agent");
2469
2499
  var PlaywrightAgent = class extends PageAgent {
2470
2500
  constructor(page, opts) {
2471
2501
  const webPage = new WebPage(page);
2472
2502
  super(webPage, opts);
2473
2503
  const { forceSameTabNavigation = true } = opts ?? {};
2474
2504
  if (forceSameTabNavigation) {
2475
- forceClosePopup(page, debug4);
2505
+ forceClosePopup(page, debug5);
2476
2506
  }
2477
2507
  }
2478
2508
  async waitForNetworkIdle(timeout = 1e3) {