@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -32,7 +32,7 @@ var puppeteer_exports = {};
32
32
  __export(puppeteer_exports, {
33
33
  PuppeteerAgent: () => PuppeteerAgent,
34
34
  PuppeteerWebPage: () => WebPage,
35
- overrideAIConfig: () => import_env4.overrideAIConfig
35
+ overrideAIConfig: () => import_env7.overrideAIConfig
36
36
  });
37
37
  module.exports = __toCommonJS(puppeteer_exports);
38
38
 
@@ -308,14 +308,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
308
308
  // src/common/agent.ts
309
309
  var import_utils11 = require("@midscene/core/utils");
310
310
  var import_constants = require("@midscene/shared/constants");
311
- var import_env3 = require("@midscene/shared/env");
312
- var import_logger3 = require("@midscene/shared/logger");
311
+ var import_env6 = require("@midscene/shared/env");
312
+ var import_logger4 = require("@midscene/shared/logger");
313
313
  var import_utils12 = require("@midscene/shared/utils");
314
314
 
315
315
  // src/common/tasks.ts
316
316
  var import_core = require("@midscene/core");
317
317
  var import_ai_model = require("@midscene/core/ai-model");
318
318
  var import_utils8 = require("@midscene/core/utils");
319
+ var import_env3 = require("@midscene/shared/env");
320
+ var import_env4 = require("@midscene/shared/env");
321
+ var import_env5 = require("@midscene/shared/env");
322
+ var import_img2 = require("@midscene/shared/img");
323
+ var import_logger2 = require("@midscene/shared/logger");
319
324
  var import_utils9 = require("@midscene/shared/utils");
320
325
 
321
326
  // src/common/task-cache.ts
@@ -418,7 +423,7 @@ function printReportMsg(filepath) {
418
423
  function replaceIllegalPathCharsAndSpace(str) {
419
424
  return str.replace(/[/\\:*?"<>| ]/g, "-");
420
425
  }
421
- function forceClosePopup(page, debug5) {
426
+ function forceClosePopup(page, debug6) {
422
427
  page.on("popup", async (popup) => {
423
428
  if (!popup) {
424
429
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -430,19 +435,19 @@ function forceClosePopup(page, debug5) {
430
435
  try {
431
436
  await popup.close();
432
437
  } catch (error) {
433
- debug5(`failed to close popup ${url}, error: ${error}`);
438
+ debug6(`failed to close popup ${url}, error: ${error}`);
434
439
  }
435
440
  } else {
436
- debug5(`popup is already closed, skip close ${url}`);
441
+ debug6(`popup is already closed, skip close ${url}`);
437
442
  }
438
443
  if (!page.isClosed()) {
439
444
  try {
440
445
  await page.goto(url);
441
446
  } catch (error) {
442
- debug5(`failed to goto ${url}, error: ${error}`);
447
+ debug6(`failed to goto ${url}, error: ${error}`);
443
448
  }
444
449
  } else {
445
- debug5(`page is already closed, skip goto ${url}`);
450
+ debug6(`page is already closed, skip goto ${url}`);
446
451
  }
447
452
  });
448
453
  }
@@ -724,6 +729,7 @@ function paramStr(task) {
724
729
  }
725
730
 
726
731
  // src/common/tasks.ts
732
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
727
733
  var replanningCountLimit = 10;
728
734
  var isAndroidPage = (page) => {
729
735
  return page.pageType === "android";
@@ -1312,13 +1318,37 @@ var PageTaskExecutor = class {
1312
1318
  };
1313
1319
  executorContext.task.recorder = [recordItem];
1314
1320
  executorContext.task.pageContext = pageContext;
1321
+ let imagePayload = pageContext.screenshotBase64;
1322
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1323
+ const size = pageContext.size;
1324
+ debug2("ui-tars-v1.5, will check image size", size);
1325
+ const currentPixels = size.width * size.height;
1326
+ const maxPixels = 16384 * 28 * 28;
1327
+ if (currentPixels > maxPixels) {
1328
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1329
+ const newWidth = Math.floor(size.width * resizeFactor);
1330
+ const newHeight = Math.floor(size.height * resizeFactor);
1331
+ debug2(
1332
+ "resize image",
1333
+ import_img2.imageInfo,
1334
+ "new width",
1335
+ newWidth,
1336
+ "new height",
1337
+ newHeight
1338
+ );
1339
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1340
+ width: newWidth,
1341
+ height: newHeight
1342
+ });
1343
+ }
1344
+ }
1315
1345
  this.appendConversationHistory({
1316
1346
  role: "user",
1317
1347
  content: [
1318
1348
  {
1319
1349
  type: "image_url",
1320
1350
  image_url: {
1321
- url: pageContext.screenshotBase64
1351
+ url: imagePayload
1322
1352
  }
1323
1353
  }
1324
1354
  ]
@@ -1661,9 +1691,9 @@ var PageTaskExecutor = class {
1661
1691
  };
1662
1692
 
1663
1693
  // src/common/plan-builder.ts
1664
- var import_logger2 = require("@midscene/shared/logger");
1694
+ var import_logger3 = require("@midscene/shared/logger");
1665
1695
  var import_utils10 = require("@midscene/shared/utils");
1666
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1696
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1667
1697
  function buildPlans(type, locateParam, param) {
1668
1698
  let returnPlans = [];
1669
1699
  const locatePlan = locateParam ? {
@@ -1725,14 +1755,14 @@ function buildPlans(type, locateParam, param) {
1725
1755
  returnPlans = [sleepPlan];
1726
1756
  }
1727
1757
  if (returnPlans) {
1728
- debug2("buildPlans", returnPlans);
1758
+ debug3("buildPlans", returnPlans);
1729
1759
  return returnPlans;
1730
1760
  }
1731
1761
  throw new Error(`Not supported type: ${type}`);
1732
1762
  }
1733
1763
 
1734
1764
  // src/common/agent.ts
1735
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1765
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1736
1766
  var PageAgent = class {
1737
1767
  constructor(page, opts) {
1738
1768
  /**
@@ -1775,7 +1805,7 @@ var PageAgent = class {
1775
1805
  });
1776
1806
  }
1777
1807
  return await parseContextFromWebPage(this.page, {
1778
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1808
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1779
1809
  });
1780
1810
  }
1781
1811
  async setAIActionContext(prompt) {
@@ -1810,7 +1840,7 @@ var PageAgent = class {
1810
1840
  type: "dump",
1811
1841
  generateReport
1812
1842
  });
1813
- debug3("writeOutActionDumps", this.reportFile);
1843
+ debug4("writeOutActionDumps", this.reportFile);
1814
1844
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1815
1845
  printReportMsg(this.reportFile);
1816
1846
  }
@@ -1914,7 +1944,7 @@ ${errorTask?.errorStack}`);
1914
1944
  return output;
1915
1945
  }
1916
1946
  async aiAction(taskPrompt) {
1917
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1947
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1918
1948
  this.afterTaskRunning(executor);
1919
1949
  return output;
1920
1950
  }
@@ -1997,7 +2027,7 @@ ${errors}`);
1997
2027
  };
1998
2028
 
1999
2029
  // src/puppeteer/index.ts
2000
- var import_logger5 = require("@midscene/shared/logger");
2030
+ var import_logger6 = require("@midscene/shared/logger");
2001
2031
 
2002
2032
  // src/puppeteer/page.ts
2003
2033
  var import_constants3 = require("@midscene/shared/constants");
@@ -2007,9 +2037,9 @@ var import_utils15 = require("@midscene/core/utils");
2007
2037
  var import_constants2 = require("@midscene/shared/constants");
2008
2038
  var import_extractor2 = require("@midscene/shared/extractor");
2009
2039
  var import_fs2 = require("@midscene/shared/fs");
2010
- var import_logger4 = require("@midscene/shared/logger");
2040
+ var import_logger5 = require("@midscene/shared/logger");
2011
2041
  var import_utils16 = require("@midscene/shared/utils");
2012
- var debugPage = (0, import_logger4.getDebug)("web:page");
2042
+ var debugPage = (0, import_logger5.getDebug)("web:page");
2013
2043
  var Page = class {
2014
2044
  constructor(underlyingPage, pageType, opts) {
2015
2045
  this.everMoved = false;
@@ -2291,15 +2321,15 @@ var WebPage = class extends Page {
2291
2321
  };
2292
2322
 
2293
2323
  // src/puppeteer/index.ts
2294
- var import_env4 = require("@midscene/shared/env");
2295
- var debug4 = (0, import_logger5.getDebug)("puppeteer:agent");
2324
+ var import_env7 = require("@midscene/shared/env");
2325
+ var debug5 = (0, import_logger6.getDebug)("puppeteer:agent");
2296
2326
  var PuppeteerAgent = class extends PageAgent {
2297
2327
  constructor(page, opts) {
2298
2328
  const webPage = new WebPage(page);
2299
2329
  super(webPage, opts);
2300
2330
  const { forceSameTabNavigation = true } = opts ?? {};
2301
2331
  if (forceSameTabNavigation) {
2302
- forceClosePopup(page, debug4);
2332
+ forceClosePopup(page, debug5);
2303
2333
  }
2304
2334
  }
2305
2335
  };