@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -395,8 +395,8 @@ import {
395
395
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
396
396
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
397
397
  } from "@midscene/shared/constants";
398
- import { vlLocateMode } from "@midscene/shared/env";
399
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
398
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
399
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
400
400
  import { assert as assert6 } from "@midscene/shared/utils";
401
401
 
402
402
  // src/common/tasks.ts
@@ -408,6 +408,14 @@ import {
408
408
  vlmPlanning
409
409
  } from "@midscene/core/ai-model";
410
410
  import { sleep } from "@midscene/core/utils";
411
+ import { UITarsModelVersion } from "@midscene/shared/env";
412
+ import { uiTarsModelVersion } from "@midscene/shared/env";
413
+ import { vlLocateMode } from "@midscene/shared/env";
414
+ import {
415
+ imageInfo,
416
+ resizeImgBase64 as resizeImgBase642
417
+ } from "@midscene/shared/img";
418
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
411
419
  import { assert as assert4 } from "@midscene/shared/utils";
412
420
 
413
421
  // src/common/task-cache.ts
@@ -718,6 +726,7 @@ if (!window.__MIDSCENE_NEW_TAB_INTERCEPTOR_INITIALIZED__) {
718
726
  `;
719
727
 
720
728
  // src/common/tasks.ts
729
+ var debug2 = getDebug2("page-task-executor");
721
730
  var replanningCountLimit = 10;
722
731
  var isAndroidPage = (page) => {
723
732
  return page.pageType === "android";
@@ -1306,13 +1315,37 @@ var PageTaskExecutor = class {
1306
1315
  };
1307
1316
  executorContext.task.recorder = [recordItem];
1308
1317
  executorContext.task.pageContext = pageContext;
1318
+ let imagePayload = pageContext.screenshotBase64;
1319
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1320
+ const size = pageContext.size;
1321
+ debug2("ui-tars-v1.5, will check image size", size);
1322
+ const currentPixels = size.width * size.height;
1323
+ const maxPixels = 16384 * 28 * 28;
1324
+ if (currentPixels > maxPixels) {
1325
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1326
+ const newWidth = Math.floor(size.width * resizeFactor);
1327
+ const newHeight = Math.floor(size.height * resizeFactor);
1328
+ debug2(
1329
+ "resize image",
1330
+ imageInfo,
1331
+ "new width",
1332
+ newWidth,
1333
+ "new height",
1334
+ newHeight
1335
+ );
1336
+ imagePayload = await resizeImgBase642(imagePayload, {
1337
+ width: newWidth,
1338
+ height: newHeight
1339
+ });
1340
+ }
1341
+ }
1309
1342
  this.appendConversationHistory({
1310
1343
  role: "user",
1311
1344
  content: [
1312
1345
  {
1313
1346
  type: "image_url",
1314
1347
  image_url: {
1315
- url: pageContext.screenshotBase64
1348
+ url: imagePayload
1316
1349
  }
1317
1350
  }
1318
1351
  ]
@@ -1655,9 +1688,9 @@ var PageTaskExecutor = class {
1655
1688
  };
1656
1689
 
1657
1690
  // src/common/plan-builder.ts
1658
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1691
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1659
1692
  import { assert as assert5 } from "@midscene/shared/utils";
1660
- var debug2 = getDebug2("plan-builder");
1693
+ var debug3 = getDebug3("plan-builder");
1661
1694
  function buildPlans(type, locateParam, param) {
1662
1695
  let returnPlans = [];
1663
1696
  const locatePlan = locateParam ? {
@@ -1719,14 +1752,14 @@ function buildPlans(type, locateParam, param) {
1719
1752
  returnPlans = [sleepPlan];
1720
1753
  }
1721
1754
  if (returnPlans) {
1722
- debug2("buildPlans", returnPlans);
1755
+ debug3("buildPlans", returnPlans);
1723
1756
  return returnPlans;
1724
1757
  }
1725
1758
  throw new Error(`Not supported type: ${type}`);
1726
1759
  }
1727
1760
 
1728
1761
  // src/common/agent.ts
1729
- var debug3 = getDebug3("web-integration");
1762
+ var debug4 = getDebug4("web-integration");
1730
1763
  var PageAgent = class {
1731
1764
  constructor(page, opts) {
1732
1765
  /**
@@ -1769,7 +1802,7 @@ var PageAgent = class {
1769
1802
  });
1770
1803
  }
1771
1804
  return await parseContextFromWebPage(this.page, {
1772
- ignoreMarker: !!vlLocateMode()
1805
+ ignoreMarker: !!vlLocateMode2()
1773
1806
  });
1774
1807
  }
1775
1808
  async setAIActionContext(prompt) {
@@ -1804,7 +1837,7 @@ var PageAgent = class {
1804
1837
  type: "dump",
1805
1838
  generateReport
1806
1839
  });
1807
- debug3("writeOutActionDumps", this.reportFile);
1840
+ debug4("writeOutActionDumps", this.reportFile);
1808
1841
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1809
1842
  printReportMsg(this.reportFile);
1810
1843
  }
@@ -1908,7 +1941,7 @@ ${errorTask?.errorStack}`);
1908
1941
  return output;
1909
1942
  }
1910
1943
  async aiAction(taskPrompt) {
1911
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1944
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1912
1945
  this.afterTaskRunning(executor);
1913
1946
  return output;
1914
1947
  }
@@ -2207,7 +2240,7 @@ function sleep2(ms) {
2207
2240
  var ChromeExtensionProxyPage = class {
2208
2241
  constructor(forceSameTabNavigation) {
2209
2242
  this.pageType = "chrome-extension-proxy";
2210
- this.version = "0.16.0";
2243
+ this.version = "0.16.1";
2211
2244
  this.activeTabId = null;
2212
2245
  this.tabIdOfDebuggerAttached = null;
2213
2246
  this.attachingDebugger = null;