@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -291,7 +291,7 @@ function sleep(ms) {
291
291
  var ChromeExtensionProxyPage = class {
292
292
  constructor(forceSameTabNavigation) {
293
293
  this.pageType = "chrome-extension-proxy";
294
- this.version = "0.16.0";
294
+ this.version = "0.16.1";
295
295
  this.activeTabId = null;
296
296
  this.tabIdOfDebuggerAttached = null;
297
297
  this.attachingDebugger = null;
@@ -737,7 +737,7 @@ var BridgeClient = class {
737
737
  this.socket = (0, import_socket.io)(this.endpoint, {
738
738
  reconnection: false,
739
739
  query: {
740
- version: "0.16.0"
740
+ version: "0.16.1"
741
741
  }
742
742
  });
743
743
  const timeout = setTimeout(() => {
@@ -878,7 +878,7 @@ var ExtensionBridgePageBrowserSide = class extends ChromeExtensionProxyPage {
878
878
  );
879
879
  await this.bridgeClient.connect();
880
880
  this.onLogMessage(
881
- `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.16.0"}`,
881
+ `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.16.1"}`,
882
882
  "log"
883
883
  );
884
884
  }
@@ -31,8 +31,8 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var bridge_mode_exports = {};
32
32
  __export(bridge_mode_exports, {
33
33
  AgentOverChromeBridge: () => AgentOverChromeBridge,
34
- allConfigFromEnv: () => import_env4.allConfigFromEnv,
35
- overrideAIConfig: () => import_env4.overrideAIConfig
34
+ allConfigFromEnv: () => import_env7.allConfigFromEnv,
35
+ overrideAIConfig: () => import_env7.overrideAIConfig
36
36
  });
37
37
  module.exports = __toCommonJS(bridge_mode_exports);
38
38
 
@@ -308,14 +308,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
308
308
  // src/common/agent.ts
309
309
  var import_utils11 = require("@midscene/core/utils");
310
310
  var import_constants = require("@midscene/shared/constants");
311
- var import_env3 = require("@midscene/shared/env");
312
- var import_logger3 = require("@midscene/shared/logger");
311
+ var import_env6 = require("@midscene/shared/env");
312
+ var import_logger4 = require("@midscene/shared/logger");
313
313
  var import_utils12 = require("@midscene/shared/utils");
314
314
 
315
315
  // src/common/tasks.ts
316
316
  var import_core = require("@midscene/core");
317
317
  var import_ai_model = require("@midscene/core/ai-model");
318
318
  var import_utils8 = require("@midscene/core/utils");
319
+ var import_env3 = require("@midscene/shared/env");
320
+ var import_env4 = require("@midscene/shared/env");
321
+ var import_env5 = require("@midscene/shared/env");
322
+ var import_img2 = require("@midscene/shared/img");
323
+ var import_logger2 = require("@midscene/shared/logger");
319
324
  var import_utils9 = require("@midscene/shared/utils");
320
325
 
321
326
  // src/common/task-cache.ts
@@ -696,6 +701,7 @@ function paramStr(task) {
696
701
  }
697
702
 
698
703
  // src/common/tasks.ts
704
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
699
705
  var replanningCountLimit = 10;
700
706
  var isAndroidPage = (page) => {
701
707
  return page.pageType === "android";
@@ -1284,13 +1290,37 @@ var PageTaskExecutor = class {
1284
1290
  };
1285
1291
  executorContext.task.recorder = [recordItem];
1286
1292
  executorContext.task.pageContext = pageContext;
1293
+ let imagePayload = pageContext.screenshotBase64;
1294
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1295
+ const size = pageContext.size;
1296
+ debug2("ui-tars-v1.5, will check image size", size);
1297
+ const currentPixels = size.width * size.height;
1298
+ const maxPixels = 16384 * 28 * 28;
1299
+ if (currentPixels > maxPixels) {
1300
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1301
+ const newWidth = Math.floor(size.width * resizeFactor);
1302
+ const newHeight = Math.floor(size.height * resizeFactor);
1303
+ debug2(
1304
+ "resize image",
1305
+ import_img2.imageInfo,
1306
+ "new width",
1307
+ newWidth,
1308
+ "new height",
1309
+ newHeight
1310
+ );
1311
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1312
+ width: newWidth,
1313
+ height: newHeight
1314
+ });
1315
+ }
1316
+ }
1287
1317
  this.appendConversationHistory({
1288
1318
  role: "user",
1289
1319
  content: [
1290
1320
  {
1291
1321
  type: "image_url",
1292
1322
  image_url: {
1293
- url: pageContext.screenshotBase64
1323
+ url: imagePayload
1294
1324
  }
1295
1325
  }
1296
1326
  ]
@@ -1633,9 +1663,9 @@ var PageTaskExecutor = class {
1633
1663
  };
1634
1664
 
1635
1665
  // src/common/plan-builder.ts
1636
- var import_logger2 = require("@midscene/shared/logger");
1666
+ var import_logger3 = require("@midscene/shared/logger");
1637
1667
  var import_utils10 = require("@midscene/shared/utils");
1638
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1668
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1639
1669
  function buildPlans(type, locateParam, param) {
1640
1670
  let returnPlans = [];
1641
1671
  const locatePlan = locateParam ? {
@@ -1697,14 +1727,14 @@ function buildPlans(type, locateParam, param) {
1697
1727
  returnPlans = [sleepPlan];
1698
1728
  }
1699
1729
  if (returnPlans) {
1700
- debug2("buildPlans", returnPlans);
1730
+ debug3("buildPlans", returnPlans);
1701
1731
  return returnPlans;
1702
1732
  }
1703
1733
  throw new Error(`Not supported type: ${type}`);
1704
1734
  }
1705
1735
 
1706
1736
  // src/common/agent.ts
1707
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1737
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1708
1738
  var PageAgent = class {
1709
1739
  constructor(page, opts) {
1710
1740
  /**
@@ -1747,7 +1777,7 @@ var PageAgent = class {
1747
1777
  });
1748
1778
  }
1749
1779
  return await parseContextFromWebPage(this.page, {
1750
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1780
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1751
1781
  });
1752
1782
  }
1753
1783
  async setAIActionContext(prompt) {
@@ -1782,7 +1812,7 @@ var PageAgent = class {
1782
1812
  type: "dump",
1783
1813
  generateReport
1784
1814
  });
1785
- debug3("writeOutActionDumps", this.reportFile);
1815
+ debug4("writeOutActionDumps", this.reportFile);
1786
1816
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1787
1817
  printReportMsg(this.reportFile);
1788
1818
  }
@@ -1886,7 +1916,7 @@ ${errorTask?.errorStack}`);
1886
1916
  return output;
1887
1917
  }
1888
1918
  async aiAction(taskPrompt) {
1889
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1919
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1890
1920
  this.afterTaskRunning(executor);
1891
1921
  return output;
1892
1922
  }
@@ -2042,7 +2072,7 @@ var BridgeServer = class {
2042
2072
  this.socket = socket;
2043
2073
  const clientVersion = socket.handshake.query.version;
2044
2074
  (0, import_utils15.logMsg)(
2045
- `Bridge connected, cli-side version v${"0.16.0"}, browser-side version v${clientVersion}`
2075
+ `Bridge connected, cli-side version v${"0.16.1"}, browser-side version v${clientVersion}`
2046
2076
  );
2047
2077
  socket.on("bridge-call-response" /* CallResponse */, (params) => {
2048
2078
  const id = params.id;
@@ -2073,7 +2103,7 @@ var BridgeServer = class {
2073
2103
  setTimeout(() => {
2074
2104
  this.onConnect?.();
2075
2105
  const payload = {
2076
- version: "0.16.0"
2106
+ version: "0.16.1"
2077
2107
  };
2078
2108
  socket.emit("bridge-connected" /* Connected */, payload);
2079
2109
  Promise.resolve().then(() => {
@@ -2281,7 +2311,7 @@ var AgentOverChromeBridge = class extends PageAgent {
2281
2311
  };
2282
2312
 
2283
2313
  // src/bridge-mode/index.ts
2284
- var import_env4 = require("@midscene/shared/env");
2314
+ var import_env7 = require("@midscene/shared/env");
2285
2315
  // Annotate the CommonJS export names for ESM import in node:
2286
2316
  0 && (module.exports = {
2287
2317
  AgentOverChromeBridge,