@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
package/dist/lib/agent.js CHANGED
@@ -304,14 +304,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
304
304
  // src/common/agent.ts
305
305
  var import_utils11 = require("@midscene/core/utils");
306
306
  var import_constants = require("@midscene/shared/constants");
307
- var import_env3 = require("@midscene/shared/env");
308
- var import_logger3 = require("@midscene/shared/logger");
307
+ var import_env6 = require("@midscene/shared/env");
308
+ var import_logger4 = require("@midscene/shared/logger");
309
309
  var import_utils12 = require("@midscene/shared/utils");
310
310
 
311
311
  // src/common/tasks.ts
312
312
  var import_core = require("@midscene/core");
313
313
  var import_ai_model = require("@midscene/core/ai-model");
314
314
  var import_utils8 = require("@midscene/core/utils");
315
+ var import_env3 = require("@midscene/shared/env");
316
+ var import_env4 = require("@midscene/shared/env");
317
+ var import_env5 = require("@midscene/shared/env");
318
+ var import_img2 = require("@midscene/shared/img");
319
+ var import_logger2 = require("@midscene/shared/logger");
315
320
  var import_utils9 = require("@midscene/shared/utils");
316
321
 
317
322
  // src/common/task-cache.ts
@@ -692,6 +697,7 @@ function paramStr(task) {
692
697
  }
693
698
 
694
699
  // src/common/tasks.ts
700
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
695
701
  var replanningCountLimit = 10;
696
702
  var isAndroidPage = (page) => {
697
703
  return page.pageType === "android";
@@ -1280,13 +1286,37 @@ var PageTaskExecutor = class {
1280
1286
  };
1281
1287
  executorContext.task.recorder = [recordItem];
1282
1288
  executorContext.task.pageContext = pageContext;
1289
+ let imagePayload = pageContext.screenshotBase64;
1290
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1291
+ const size = pageContext.size;
1292
+ debug2("ui-tars-v1.5, will check image size", size);
1293
+ const currentPixels = size.width * size.height;
1294
+ const maxPixels = 16384 * 28 * 28;
1295
+ if (currentPixels > maxPixels) {
1296
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1297
+ const newWidth = Math.floor(size.width * resizeFactor);
1298
+ const newHeight = Math.floor(size.height * resizeFactor);
1299
+ debug2(
1300
+ "resize image",
1301
+ import_img2.imageInfo,
1302
+ "new width",
1303
+ newWidth,
1304
+ "new height",
1305
+ newHeight
1306
+ );
1307
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1308
+ width: newWidth,
1309
+ height: newHeight
1310
+ });
1311
+ }
1312
+ }
1283
1313
  this.appendConversationHistory({
1284
1314
  role: "user",
1285
1315
  content: [
1286
1316
  {
1287
1317
  type: "image_url",
1288
1318
  image_url: {
1289
- url: pageContext.screenshotBase64
1319
+ url: imagePayload
1290
1320
  }
1291
1321
  }
1292
1322
  ]
@@ -1629,9 +1659,9 @@ var PageTaskExecutor = class {
1629
1659
  };
1630
1660
 
1631
1661
  // src/common/plan-builder.ts
1632
- var import_logger2 = require("@midscene/shared/logger");
1662
+ var import_logger3 = require("@midscene/shared/logger");
1633
1663
  var import_utils10 = require("@midscene/shared/utils");
1634
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1664
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1635
1665
  function buildPlans(type, locateParam, param) {
1636
1666
  let returnPlans = [];
1637
1667
  const locatePlan = locateParam ? {
@@ -1693,14 +1723,14 @@ function buildPlans(type, locateParam, param) {
1693
1723
  returnPlans = [sleepPlan];
1694
1724
  }
1695
1725
  if (returnPlans) {
1696
- debug2("buildPlans", returnPlans);
1726
+ debug3("buildPlans", returnPlans);
1697
1727
  return returnPlans;
1698
1728
  }
1699
1729
  throw new Error(`Not supported type: ${type}`);
1700
1730
  }
1701
1731
 
1702
1732
  // src/common/agent.ts
1703
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1733
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1704
1734
  var PageAgent = class {
1705
1735
  constructor(page, opts) {
1706
1736
  /**
@@ -1743,7 +1773,7 @@ var PageAgent = class {
1743
1773
  });
1744
1774
  }
1745
1775
  return await parseContextFromWebPage(this.page, {
1746
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1776
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1747
1777
  });
1748
1778
  }
1749
1779
  async setAIActionContext(prompt) {
@@ -1778,7 +1808,7 @@ var PageAgent = class {
1778
1808
  type: "dump",
1779
1809
  generateReport
1780
1810
  });
1781
- debug3("writeOutActionDumps", this.reportFile);
1811
+ debug4("writeOutActionDumps", this.reportFile);
1782
1812
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1783
1813
  printReportMsg(this.reportFile);
1784
1814
  }
@@ -1882,7 +1912,7 @@ ${errorTask?.errorStack}`);
1882
1912
  return output;
1883
1913
  }
1884
1914
  async aiAction(taskPrompt) {
1885
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1915
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1886
1916
  this.afterTaskRunning(executor);
1887
1917
  return output;
1888
1918
  }