@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -258,7 +258,7 @@ function sleep(ms) {
258
258
  var ChromeExtensionProxyPage = class {
259
259
  constructor(forceSameTabNavigation) {
260
260
  this.pageType = "chrome-extension-proxy";
261
- this.version = "0.16.0";
261
+ this.version = "0.16.1";
262
262
  this.activeTabId = null;
263
263
  this.tabIdOfDebuggerAttached = null;
264
264
  this.attachingDebugger = null;
@@ -704,7 +704,7 @@ var BridgeClient = class {
704
704
  this.socket = ClientIO(this.endpoint, {
705
705
  reconnection: false,
706
706
  query: {
707
- version: "0.16.0"
707
+ version: "0.16.1"
708
708
  }
709
709
  });
710
710
  const timeout = setTimeout(() => {
@@ -845,7 +845,7 @@ var ExtensionBridgePageBrowserSide = class extends ChromeExtensionProxyPage {
845
845
  );
846
846
  await this.bridgeClient.connect();
847
847
  this.onLogMessage(
848
- `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.16.0"}`,
848
+ `Bridge connected, cli-side version v${this.bridgeClient.serverVersion}, browser-side version v${"0.16.1"}`,
849
849
  "log"
850
850
  );
851
851
  }
@@ -280,8 +280,8 @@ import {
280
280
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
281
281
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
282
282
  } from "@midscene/shared/constants";
283
- import { vlLocateMode } from "@midscene/shared/env";
284
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
283
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
284
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
285
285
  import { assert as assert6 } from "@midscene/shared/utils";
286
286
 
287
287
  // src/common/tasks.ts
@@ -293,6 +293,14 @@ import {
293
293
  vlmPlanning
294
294
  } from "@midscene/core/ai-model";
295
295
  import { sleep } from "@midscene/core/utils";
296
+ import { UITarsModelVersion } from "@midscene/shared/env";
297
+ import { uiTarsModelVersion } from "@midscene/shared/env";
298
+ import { vlLocateMode } from "@midscene/shared/env";
299
+ import {
300
+ imageInfo,
301
+ resizeImgBase64 as resizeImgBase642
302
+ } from "@midscene/shared/img";
303
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
296
304
  import { assert as assert4 } from "@midscene/shared/utils";
297
305
 
298
306
  // src/common/task-cache.ts
@@ -673,6 +681,7 @@ function paramStr(task) {
673
681
  }
674
682
 
675
683
  // src/common/tasks.ts
684
+ var debug2 = getDebug2("page-task-executor");
676
685
  var replanningCountLimit = 10;
677
686
  var isAndroidPage = (page) => {
678
687
  return page.pageType === "android";
@@ -1261,13 +1270,37 @@ var PageTaskExecutor = class {
1261
1270
  };
1262
1271
  executorContext.task.recorder = [recordItem];
1263
1272
  executorContext.task.pageContext = pageContext;
1273
+ let imagePayload = pageContext.screenshotBase64;
1274
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1275
+ const size = pageContext.size;
1276
+ debug2("ui-tars-v1.5, will check image size", size);
1277
+ const currentPixels = size.width * size.height;
1278
+ const maxPixels = 16384 * 28 * 28;
1279
+ if (currentPixels > maxPixels) {
1280
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1281
+ const newWidth = Math.floor(size.width * resizeFactor);
1282
+ const newHeight = Math.floor(size.height * resizeFactor);
1283
+ debug2(
1284
+ "resize image",
1285
+ imageInfo,
1286
+ "new width",
1287
+ newWidth,
1288
+ "new height",
1289
+ newHeight
1290
+ );
1291
+ imagePayload = await resizeImgBase642(imagePayload, {
1292
+ width: newWidth,
1293
+ height: newHeight
1294
+ });
1295
+ }
1296
+ }
1264
1297
  this.appendConversationHistory({
1265
1298
  role: "user",
1266
1299
  content: [
1267
1300
  {
1268
1301
  type: "image_url",
1269
1302
  image_url: {
1270
- url: pageContext.screenshotBase64
1303
+ url: imagePayload
1271
1304
  }
1272
1305
  }
1273
1306
  ]
@@ -1610,9 +1643,9 @@ var PageTaskExecutor = class {
1610
1643
  };
1611
1644
 
1612
1645
  // src/common/plan-builder.ts
1613
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1646
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1614
1647
  import { assert as assert5 } from "@midscene/shared/utils";
1615
- var debug2 = getDebug2("plan-builder");
1648
+ var debug3 = getDebug3("plan-builder");
1616
1649
  function buildPlans(type, locateParam, param) {
1617
1650
  let returnPlans = [];
1618
1651
  const locatePlan = locateParam ? {
@@ -1674,14 +1707,14 @@ function buildPlans(type, locateParam, param) {
1674
1707
  returnPlans = [sleepPlan];
1675
1708
  }
1676
1709
  if (returnPlans) {
1677
- debug2("buildPlans", returnPlans);
1710
+ debug3("buildPlans", returnPlans);
1678
1711
  return returnPlans;
1679
1712
  }
1680
1713
  throw new Error(`Not supported type: ${type}`);
1681
1714
  }
1682
1715
 
1683
1716
  // src/common/agent.ts
1684
- var debug3 = getDebug3("web-integration");
1717
+ var debug4 = getDebug4("web-integration");
1685
1718
  var PageAgent = class {
1686
1719
  constructor(page, opts) {
1687
1720
  /**
@@ -1724,7 +1757,7 @@ var PageAgent = class {
1724
1757
  });
1725
1758
  }
1726
1759
  return await parseContextFromWebPage(this.page, {
1727
- ignoreMarker: !!vlLocateMode()
1760
+ ignoreMarker: !!vlLocateMode2()
1728
1761
  });
1729
1762
  }
1730
1763
  async setAIActionContext(prompt) {
@@ -1759,7 +1792,7 @@ var PageAgent = class {
1759
1792
  type: "dump",
1760
1793
  generateReport
1761
1794
  });
1762
- debug3("writeOutActionDumps", this.reportFile);
1795
+ debug4("writeOutActionDumps", this.reportFile);
1763
1796
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1764
1797
  printReportMsg(this.reportFile);
1765
1798
  }
@@ -1863,7 +1896,7 @@ ${errorTask?.errorStack}`);
1863
1896
  return output;
1864
1897
  }
1865
1898
  async aiAction(taskPrompt) {
1866
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1899
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1867
1900
  this.afterTaskRunning(executor);
1868
1901
  return output;
1869
1902
  }
@@ -2019,7 +2052,7 @@ var BridgeServer = class {
2019
2052
  this.socket = socket;
2020
2053
  const clientVersion = socket.handshake.query.version;
2021
2054
  logMsg2(
2022
- `Bridge connected, cli-side version v${"0.16.0"}, browser-side version v${clientVersion}`
2055
+ `Bridge connected, cli-side version v${"0.16.1"}, browser-side version v${clientVersion}`
2023
2056
  );
2024
2057
  socket.on("bridge-call-response" /* CallResponse */, (params) => {
2025
2058
  const id = params.id;
@@ -2050,7 +2083,7 @@ var BridgeServer = class {
2050
2083
  setTimeout(() => {
2051
2084
  this.onConnect?.();
2052
2085
  const payload = {
2053
- version: "0.16.0"
2086
+ version: "0.16.1"
2054
2087
  };
2055
2088
  socket.emit("bridge-connected" /* Connected */, payload);
2056
2089
  Promise.resolve().then(() => {