@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -372,8 +372,8 @@ import {
372
372
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
373
373
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
374
374
  } from "@midscene/shared/constants";
375
- import { vlLocateMode } from "@midscene/shared/env";
376
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
375
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
376
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
377
377
  import { assert as assert6 } from "@midscene/shared/utils";
378
378
 
379
379
  // src/common/tasks.ts
@@ -385,6 +385,14 @@ import {
385
385
  vlmPlanning
386
386
  } from "@midscene/core/ai-model";
387
387
  import { sleep } from "@midscene/core/utils";
388
+ import { UITarsModelVersion } from "@midscene/shared/env";
389
+ import { uiTarsModelVersion } from "@midscene/shared/env";
390
+ import { vlLocateMode } from "@midscene/shared/env";
391
+ import {
392
+ imageInfo,
393
+ resizeImgBase64 as resizeImgBase642
394
+ } from "@midscene/shared/img";
395
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
388
396
  import { assert as assert4 } from "@midscene/shared/utils";
389
397
 
390
398
  // src/common/task-cache.ts
@@ -672,6 +680,7 @@ function paramStr(task) {
672
680
  }
673
681
 
674
682
  // src/common/tasks.ts
683
+ var debug2 = getDebug2("page-task-executor");
675
684
  var replanningCountLimit = 10;
676
685
  var isAndroidPage = (page) => {
677
686
  return page.pageType === "android";
@@ -1260,13 +1269,37 @@ var PageTaskExecutor = class {
1260
1269
  };
1261
1270
  executorContext.task.recorder = [recordItem];
1262
1271
  executorContext.task.pageContext = pageContext;
1272
+ let imagePayload = pageContext.screenshotBase64;
1273
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1274
+ const size = pageContext.size;
1275
+ debug2("ui-tars-v1.5, will check image size", size);
1276
+ const currentPixels = size.width * size.height;
1277
+ const maxPixels = 16384 * 28 * 28;
1278
+ if (currentPixels > maxPixels) {
1279
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1280
+ const newWidth = Math.floor(size.width * resizeFactor);
1281
+ const newHeight = Math.floor(size.height * resizeFactor);
1282
+ debug2(
1283
+ "resize image",
1284
+ imageInfo,
1285
+ "new width",
1286
+ newWidth,
1287
+ "new height",
1288
+ newHeight
1289
+ );
1290
+ imagePayload = await resizeImgBase642(imagePayload, {
1291
+ width: newWidth,
1292
+ height: newHeight
1293
+ });
1294
+ }
1295
+ }
1263
1296
  this.appendConversationHistory({
1264
1297
  role: "user",
1265
1298
  content: [
1266
1299
  {
1267
1300
  type: "image_url",
1268
1301
  image_url: {
1269
- url: pageContext.screenshotBase64
1302
+ url: imagePayload
1270
1303
  }
1271
1304
  }
1272
1305
  ]
@@ -1609,9 +1642,9 @@ var PageTaskExecutor = class {
1609
1642
  };
1610
1643
 
1611
1644
  // src/common/plan-builder.ts
1612
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1645
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1613
1646
  import { assert as assert5 } from "@midscene/shared/utils";
1614
- var debug2 = getDebug2("plan-builder");
1647
+ var debug3 = getDebug3("plan-builder");
1615
1648
  function buildPlans(type, locateParam, param) {
1616
1649
  let returnPlans = [];
1617
1650
  const locatePlan = locateParam ? {
@@ -1673,14 +1706,14 @@ function buildPlans(type, locateParam, param) {
1673
1706
  returnPlans = [sleepPlan];
1674
1707
  }
1675
1708
  if (returnPlans) {
1676
- debug2("buildPlans", returnPlans);
1709
+ debug3("buildPlans", returnPlans);
1677
1710
  return returnPlans;
1678
1711
  }
1679
1712
  throw new Error(`Not supported type: ${type}`);
1680
1713
  }
1681
1714
 
1682
1715
  // src/common/agent.ts
1683
- var debug3 = getDebug3("web-integration");
1716
+ var debug4 = getDebug4("web-integration");
1684
1717
  var PageAgent = class {
1685
1718
  constructor(page, opts) {
1686
1719
  /**
@@ -1723,7 +1756,7 @@ var PageAgent = class {
1723
1756
  });
1724
1757
  }
1725
1758
  return await parseContextFromWebPage(this.page, {
1726
- ignoreMarker: !!vlLocateMode()
1759
+ ignoreMarker: !!vlLocateMode2()
1727
1760
  });
1728
1761
  }
1729
1762
  async setAIActionContext(prompt) {
@@ -1758,7 +1791,7 @@ var PageAgent = class {
1758
1791
  type: "dump",
1759
1792
  generateReport
1760
1793
  });
1761
- debug3("writeOutActionDumps", this.reportFile);
1794
+ debug4("writeOutActionDumps", this.reportFile);
1762
1795
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1763
1796
  printReportMsg(this.reportFile);
1764
1797
  }
@@ -1862,7 +1895,7 @@ ${errorTask?.errorStack}`);
1862
1895
  return output;
1863
1896
  }
1864
1897
  async aiAction(taskPrompt) {
1865
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1898
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1866
1899
  this.afterTaskRunning(executor);
1867
1900
  return output;
1868
1901
  }