@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -55,7 +55,7 @@ __export(chrome_extension_exports, {
55
55
  ChromeExtensionProxyPage: () => ChromeExtensionProxyPage,
56
56
  ChromeExtensionProxyPageAgent: () => ChromeExtensionProxyPageAgent,
57
57
  ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED: () => ERROR_CODE_NOT_IMPLEMENTED_AS_DESIGNED,
58
- overrideAIConfig: () => import_env4.overrideAIConfig
58
+ overrideAIConfig: () => import_env7.overrideAIConfig
59
59
  });
60
60
  module.exports = __toCommonJS(chrome_extension_exports);
61
61
 
@@ -423,14 +423,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
423
423
  // src/common/agent.ts
424
424
  var import_utils11 = require("@midscene/core/utils");
425
425
  var import_constants = require("@midscene/shared/constants");
426
- var import_env3 = require("@midscene/shared/env");
427
- var import_logger3 = require("@midscene/shared/logger");
426
+ var import_env6 = require("@midscene/shared/env");
427
+ var import_logger4 = require("@midscene/shared/logger");
428
428
  var import_utils12 = require("@midscene/shared/utils");
429
429
 
430
430
  // src/common/tasks.ts
431
431
  var import_core = require("@midscene/core");
432
432
  var import_ai_model = require("@midscene/core/ai-model");
433
433
  var import_utils8 = require("@midscene/core/utils");
434
+ var import_env3 = require("@midscene/shared/env");
435
+ var import_env4 = require("@midscene/shared/env");
436
+ var import_env5 = require("@midscene/shared/env");
437
+ var import_img2 = require("@midscene/shared/img");
438
+ var import_logger2 = require("@midscene/shared/logger");
434
439
  var import_utils9 = require("@midscene/shared/utils");
435
440
 
436
441
  // src/common/task-cache.ts
@@ -741,6 +746,7 @@ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
741
746
  `;
742
747
 
743
748
  // src/common/tasks.ts
749
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
744
750
  var replanningCountLimit = 10;
745
751
  var isAndroidPage = (page) => {
746
752
  return page.pageType === "android";
@@ -1329,13 +1335,37 @@ var PageTaskExecutor = class {
1329
1335
  };
1330
1336
  executorContext.task.recorder = [recordItem];
1331
1337
  executorContext.task.pageContext = pageContext;
1338
+ let imagePayload = pageContext.screenshotBase64;
1339
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1340
+ const size = pageContext.size;
1341
+ debug2("ui-tars-v1.5, will check image size", size);
1342
+ const currentPixels = size.width * size.height;
1343
+ const maxPixels = 16384 * 28 * 28;
1344
+ if (currentPixels > maxPixels) {
1345
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1346
+ const newWidth = Math.floor(size.width * resizeFactor);
1347
+ const newHeight = Math.floor(size.height * resizeFactor);
1348
+ debug2(
1349
+ "resize image",
1350
+ import_img2.imageInfo,
1351
+ "new width",
1352
+ newWidth,
1353
+ "new height",
1354
+ newHeight
1355
+ );
1356
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1357
+ width: newWidth,
1358
+ height: newHeight
1359
+ });
1360
+ }
1361
+ }
1332
1362
  this.appendConversationHistory({
1333
1363
  role: "user",
1334
1364
  content: [
1335
1365
  {
1336
1366
  type: "image_url",
1337
1367
  image_url: {
1338
- url: pageContext.screenshotBase64
1368
+ url: imagePayload
1339
1369
  }
1340
1370
  }
1341
1371
  ]
@@ -1678,9 +1708,9 @@ var PageTaskExecutor = class {
1678
1708
  };
1679
1709
 
1680
1710
  // src/common/plan-builder.ts
1681
- var import_logger2 = require("@midscene/shared/logger");
1711
+ var import_logger3 = require("@midscene/shared/logger");
1682
1712
  var import_utils10 = require("@midscene/shared/utils");
1683
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1713
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1684
1714
  function buildPlans(type, locateParam, param) {
1685
1715
  let returnPlans = [];
1686
1716
  const locatePlan = locateParam ? {
@@ -1742,14 +1772,14 @@ function buildPlans(type, locateParam, param) {
1742
1772
  returnPlans = [sleepPlan];
1743
1773
  }
1744
1774
  if (returnPlans) {
1745
- debug2("buildPlans", returnPlans);
1775
+ debug3("buildPlans", returnPlans);
1746
1776
  return returnPlans;
1747
1777
  }
1748
1778
  throw new Error(`Not supported type: ${type}`);
1749
1779
  }
1750
1780
 
1751
1781
  // src/common/agent.ts
1752
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1782
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1753
1783
  var PageAgent = class {
1754
1784
  constructor(page, opts) {
1755
1785
  /**
@@ -1792,7 +1822,7 @@ var PageAgent = class {
1792
1822
  });
1793
1823
  }
1794
1824
  return await parseContextFromWebPage(this.page, {
1795
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1825
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1796
1826
  });
1797
1827
  }
1798
1828
  async setAIActionContext(prompt) {
@@ -1827,7 +1857,7 @@ var PageAgent = class {
1827
1857
  type: "dump",
1828
1858
  generateReport
1829
1859
  });
1830
- debug3("writeOutActionDumps", this.reportFile);
1860
+ debug4("writeOutActionDumps", this.reportFile);
1831
1861
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1832
1862
  printReportMsg(this.reportFile);
1833
1863
  }
@@ -1931,7 +1961,7 @@ ${errorTask?.errorStack}`);
1931
1961
  return output;
1932
1962
  }
1933
1963
  async aiAction(taskPrompt) {
1934
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1964
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1935
1965
  this.afterTaskRunning(executor);
1936
1966
  return output;
1937
1967
  }
@@ -2228,7 +2258,7 @@ function sleep2(ms) {
2228
2258
  var ChromeExtensionProxyPage = class {
2229
2259
  constructor(forceSameTabNavigation) {
2230
2260
  this.pageType = "chrome-extension-proxy";
2231
- this.version = "0.16.0";
2261
+ this.version = "0.16.1";
2232
2262
  this.activeTabId = null;
2233
2263
  this.tabIdOfDebuggerAttached = null;
2234
2264
  this.attachingDebugger = null;
@@ -2655,7 +2685,7 @@ var ChromeExtensionProxyPage = class {
2655
2685
  };
2656
2686
 
2657
2687
  // src/chrome-extension/index.ts
2658
- var import_env4 = require("@midscene/shared/env");
2688
+ var import_env7 = require("@midscene/shared/env");
2659
2689
  // Annotate the CommonJS export names for ESM import in node:
2660
2690
  0 && (module.exports = {
2661
2691
  ChromeExtensionProxyPage,