@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -386,14 +386,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
386
386
  // src/common/agent.ts
387
387
  var import_utils11 = require("@midscene/core/utils");
388
388
  var import_constants = require("@midscene/shared/constants");
389
- var import_env3 = require("@midscene/shared/env");
390
- var import_logger3 = require("@midscene/shared/logger");
389
+ var import_env6 = require("@midscene/shared/env");
390
+ var import_logger4 = require("@midscene/shared/logger");
391
391
  var import_utils12 = require("@midscene/shared/utils");
392
392
 
393
393
  // src/common/tasks.ts
394
394
  var import_core = require("@midscene/core");
395
395
  var import_ai_model = require("@midscene/core/ai-model");
396
396
  var import_utils8 = require("@midscene/core/utils");
397
+ var import_env3 = require("@midscene/shared/env");
398
+ var import_env4 = require("@midscene/shared/env");
399
+ var import_env5 = require("@midscene/shared/env");
400
+ var import_img2 = require("@midscene/shared/img");
401
+ var import_logger2 = require("@midscene/shared/logger");
397
402
  var import_utils9 = require("@midscene/shared/utils");
398
403
 
399
404
  // src/common/task-cache.ts
@@ -681,6 +686,7 @@ function paramStr(task) {
681
686
  }
682
687
 
683
688
  // src/common/tasks.ts
689
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
684
690
  var replanningCountLimit = 10;
685
691
  var isAndroidPage = (page) => {
686
692
  return page.pageType === "android";
@@ -1269,13 +1275,37 @@ var PageTaskExecutor = class {
1269
1275
  };
1270
1276
  executorContext.task.recorder = [recordItem];
1271
1277
  executorContext.task.pageContext = pageContext;
1278
+ let imagePayload = pageContext.screenshotBase64;
1279
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1280
+ const size = pageContext.size;
1281
+ debug2("ui-tars-v1.5, will check image size", size);
1282
+ const currentPixels = size.width * size.height;
1283
+ const maxPixels = 16384 * 28 * 28;
1284
+ if (currentPixels > maxPixels) {
1285
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1286
+ const newWidth = Math.floor(size.width * resizeFactor);
1287
+ const newHeight = Math.floor(size.height * resizeFactor);
1288
+ debug2(
1289
+ "resize image",
1290
+ import_img2.imageInfo,
1291
+ "new width",
1292
+ newWidth,
1293
+ "new height",
1294
+ newHeight
1295
+ );
1296
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1297
+ width: newWidth,
1298
+ height: newHeight
1299
+ });
1300
+ }
1301
+ }
1272
1302
  this.appendConversationHistory({
1273
1303
  role: "user",
1274
1304
  content: [
1275
1305
  {
1276
1306
  type: "image_url",
1277
1307
  image_url: {
1278
- url: pageContext.screenshotBase64
1308
+ url: imagePayload
1279
1309
  }
1280
1310
  }
1281
1311
  ]
@@ -1618,9 +1648,9 @@ var PageTaskExecutor = class {
1618
1648
  };
1619
1649
 
1620
1650
  // src/common/plan-builder.ts
1621
- var import_logger2 = require("@midscene/shared/logger");
1651
+ var import_logger3 = require("@midscene/shared/logger");
1622
1652
  var import_utils10 = require("@midscene/shared/utils");
1623
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1653
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1624
1654
  function buildPlans(type, locateParam, param) {
1625
1655
  let returnPlans = [];
1626
1656
  const locatePlan = locateParam ? {
@@ -1682,14 +1712,14 @@ function buildPlans(type, locateParam, param) {
1682
1712
  returnPlans = [sleepPlan];
1683
1713
  }
1684
1714
  if (returnPlans) {
1685
- debug2("buildPlans", returnPlans);
1715
+ debug3("buildPlans", returnPlans);
1686
1716
  return returnPlans;
1687
1717
  }
1688
1718
  throw new Error(`Not supported type: ${type}`);
1689
1719
  }
1690
1720
 
1691
1721
  // src/common/agent.ts
1692
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1722
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1693
1723
  var PageAgent = class {
1694
1724
  constructor(page, opts) {
1695
1725
  /**
@@ -1732,7 +1762,7 @@ var PageAgent = class {
1732
1762
  });
1733
1763
  }
1734
1764
  return await parseContextFromWebPage(this.page, {
1735
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1765
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1736
1766
  });
1737
1767
  }
1738
1768
  async setAIActionContext(prompt) {
@@ -1767,7 +1797,7 @@ var PageAgent = class {
1767
1797
  type: "dump",
1768
1798
  generateReport
1769
1799
  });
1770
- debug3("writeOutActionDumps", this.reportFile);
1800
+ debug4("writeOutActionDumps", this.reportFile);
1771
1801
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1772
1802
  printReportMsg(this.reportFile);
1773
1803
  }
@@ -1871,7 +1901,7 @@ ${errorTask?.errorStack}`);
1871
1901
  return output;
1872
1902
  }
1873
1903
  async aiAction(taskPrompt) {
1874
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1904
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1875
1905
  this.afterTaskRunning(executor);
1876
1906
  return output;
1877
1907
  }
@@ -2057,7 +2087,7 @@ var import_node_fs3 = require("fs");
2057
2087
  var import_node_path3 = require("path");
2058
2088
  var import_utils18 = require("@midscene/core/utils");
2059
2089
  var import_constants2 = require("@midscene/shared/constants");
2060
- var import_env4 = require("@midscene/shared/env");
2090
+ var import_env7 = require("@midscene/shared/env");
2061
2091
  var import_utils19 = require("@midscene/shared/utils");
2062
2092
  var import_cors = __toESM(require("cors"));
2063
2093
  var import_dotenv = __toESM(require("dotenv"));
@@ -2073,7 +2103,7 @@ var setup = async () => {
2073
2103
  if (!import_utils19.ifInBrowser) {
2074
2104
  const { parsed } = import_dotenv.default.config();
2075
2105
  if (parsed) {
2076
- (0, import_env4.overrideAIConfig)(parsed);
2106
+ (0, import_env7.overrideAIConfig)(parsed);
2077
2107
  }
2078
2108
  }
2079
2109
  };
@@ -2237,7 +2267,7 @@ var PlaygroundServer = class {
2237
2267
  });
2238
2268
  }
2239
2269
  try {
2240
- (0, import_env4.overrideAIConfig)(aiConfig);
2270
+ (0, import_env7.overrideAIConfig)(aiConfig);
2241
2271
  return res.json({
2242
2272
  status: "ok",
2243
2273
  message: "AI config updated successfully"