@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -400,14 +400,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
400
400
  // src/common/agent.ts
401
401
  var import_utils11 = require("@midscene/core/utils");
402
402
  var import_constants = require("@midscene/shared/constants");
403
- var import_env3 = require("@midscene/shared/env");
404
- var import_logger3 = require("@midscene/shared/logger");
403
+ var import_env6 = require("@midscene/shared/env");
404
+ var import_logger4 = require("@midscene/shared/logger");
405
405
  var import_utils12 = require("@midscene/shared/utils");
406
406
 
407
407
  // src/common/tasks.ts
408
408
  var import_core = require("@midscene/core");
409
409
  var import_ai_model = require("@midscene/core/ai-model");
410
410
  var import_utils8 = require("@midscene/core/utils");
411
+ var import_env3 = require("@midscene/shared/env");
412
+ var import_env4 = require("@midscene/shared/env");
413
+ var import_env5 = require("@midscene/shared/env");
414
+ var import_img2 = require("@midscene/shared/img");
415
+ var import_logger2 = require("@midscene/shared/logger");
411
416
  var import_utils9 = require("@midscene/shared/utils");
412
417
 
413
418
  // src/common/task-cache.ts
@@ -695,6 +700,7 @@ function paramStr(task) {
695
700
  }
696
701
 
697
702
  // src/common/tasks.ts
703
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
698
704
  var replanningCountLimit = 10;
699
705
  var isAndroidPage = (page) => {
700
706
  return page.pageType === "android";
@@ -1283,13 +1289,37 @@ var PageTaskExecutor = class {
1283
1289
  };
1284
1290
  executorContext.task.recorder = [recordItem];
1285
1291
  executorContext.task.pageContext = pageContext;
1292
+ let imagePayload = pageContext.screenshotBase64;
1293
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1294
+ const size = pageContext.size;
1295
+ debug2("ui-tars-v1.5, will check image size", size);
1296
+ const currentPixels = size.width * size.height;
1297
+ const maxPixels = 16384 * 28 * 28;
1298
+ if (currentPixels > maxPixels) {
1299
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1300
+ const newWidth = Math.floor(size.width * resizeFactor);
1301
+ const newHeight = Math.floor(size.height * resizeFactor);
1302
+ debug2(
1303
+ "resize image",
1304
+ import_img2.imageInfo,
1305
+ "new width",
1306
+ newWidth,
1307
+ "new height",
1308
+ newHeight
1309
+ );
1310
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1311
+ width: newWidth,
1312
+ height: newHeight
1313
+ });
1314
+ }
1315
+ }
1286
1316
  this.appendConversationHistory({
1287
1317
  role: "user",
1288
1318
  content: [
1289
1319
  {
1290
1320
  type: "image_url",
1291
1321
  image_url: {
1292
- url: pageContext.screenshotBase64
1322
+ url: imagePayload
1293
1323
  }
1294
1324
  }
1295
1325
  ]
@@ -1632,9 +1662,9 @@ var PageTaskExecutor = class {
1632
1662
  };
1633
1663
 
1634
1664
  // src/common/plan-builder.ts
1635
- var import_logger2 = require("@midscene/shared/logger");
1665
+ var import_logger3 = require("@midscene/shared/logger");
1636
1666
  var import_utils10 = require("@midscene/shared/utils");
1637
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1667
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1638
1668
  function buildPlans(type, locateParam, param) {
1639
1669
  let returnPlans = [];
1640
1670
  const locatePlan = locateParam ? {
@@ -1696,14 +1726,14 @@ function buildPlans(type, locateParam, param) {
1696
1726
  returnPlans = [sleepPlan];
1697
1727
  }
1698
1728
  if (returnPlans) {
1699
- debug2("buildPlans", returnPlans);
1729
+ debug3("buildPlans", returnPlans);
1700
1730
  return returnPlans;
1701
1731
  }
1702
1732
  throw new Error(`Not supported type: ${type}`);
1703
1733
  }
1704
1734
 
1705
1735
  // src/common/agent.ts
1706
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1736
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1707
1737
  var PageAgent = class {
1708
1738
  constructor(page, opts) {
1709
1739
  /**
@@ -1746,7 +1776,7 @@ var PageAgent = class {
1746
1776
  });
1747
1777
  }
1748
1778
  return await parseContextFromWebPage(this.page, {
1749
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1779
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1750
1780
  });
1751
1781
  }
1752
1782
  async setAIActionContext(prompt) {
@@ -1781,7 +1811,7 @@ var PageAgent = class {
1781
1811
  type: "dump",
1782
1812
  generateReport
1783
1813
  });
1784
- debug3("writeOutActionDumps", this.reportFile);
1814
+ debug4("writeOutActionDumps", this.reportFile);
1785
1815
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1786
1816
  printReportMsg(this.reportFile);
1787
1817
  }
@@ -1885,7 +1915,7 @@ ${errorTask?.errorStack}`);
1885
1915
  return output;
1886
1916
  }
1887
1917
  async aiAction(taskPrompt) {
1888
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1918
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1889
1919
  this.afterTaskRunning(executor);
1890
1920
  return output;
1891
1921
  }