@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -280,8 +280,8 @@ import {
280
280
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
281
281
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
282
282
  } from "@midscene/shared/constants";
283
- import { vlLocateMode } from "@midscene/shared/env";
284
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
283
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
284
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
285
285
  import { assert as assert6 } from "@midscene/shared/utils";
286
286
 
287
287
  // src/common/tasks.ts
@@ -293,6 +293,14 @@ import {
293
293
  vlmPlanning
294
294
  } from "@midscene/core/ai-model";
295
295
  import { sleep } from "@midscene/core/utils";
296
+ import { UITarsModelVersion } from "@midscene/shared/env";
297
+ import { uiTarsModelVersion } from "@midscene/shared/env";
298
+ import { vlLocateMode } from "@midscene/shared/env";
299
+ import {
300
+ imageInfo,
301
+ resizeImgBase64 as resizeImgBase642
302
+ } from "@midscene/shared/img";
303
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
296
304
  import { assert as assert4 } from "@midscene/shared/utils";
297
305
 
298
306
  // src/common/task-cache.ts
@@ -395,7 +403,7 @@ function printReportMsg(filepath) {
395
403
  function replaceIllegalPathCharsAndSpace(str) {
396
404
  return str.replace(/[/\\:*?"<>| ]/g, "-");
397
405
  }
398
- function forceClosePopup(page, debug5) {
406
+ function forceClosePopup(page, debug6) {
399
407
  page.on("popup", async (popup) => {
400
408
  if (!popup) {
401
409
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -407,19 +415,19 @@ function forceClosePopup(page, debug5) {
407
415
  try {
408
416
  await popup.close();
409
417
  } catch (error) {
410
- debug5(`failed to close popup ${url}, error: ${error}`);
418
+ debug6(`failed to close popup ${url}, error: ${error}`);
411
419
  }
412
420
  } else {
413
- debug5(`popup is already closed, skip close ${url}`);
421
+ debug6(`popup is already closed, skip close ${url}`);
414
422
  }
415
423
  if (!page.isClosed()) {
416
424
  try {
417
425
  await page.goto(url);
418
426
  } catch (error) {
419
- debug5(`failed to goto ${url}, error: ${error}`);
427
+ debug6(`failed to goto ${url}, error: ${error}`);
420
428
  }
421
429
  } else {
422
- debug5(`page is already closed, skip goto ${url}`);
430
+ debug6(`page is already closed, skip goto ${url}`);
423
431
  }
424
432
  });
425
433
  }
@@ -701,6 +709,7 @@ function paramStr(task) {
701
709
  }
702
710
 
703
711
  // src/common/tasks.ts
712
+ var debug2 = getDebug2("page-task-executor");
704
713
  var replanningCountLimit = 10;
705
714
  var isAndroidPage = (page) => {
706
715
  return page.pageType === "android";
@@ -1289,13 +1298,37 @@ var PageTaskExecutor = class {
1289
1298
  };
1290
1299
  executorContext.task.recorder = [recordItem];
1291
1300
  executorContext.task.pageContext = pageContext;
1301
+ let imagePayload = pageContext.screenshotBase64;
1302
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1303
+ const size = pageContext.size;
1304
+ debug2("ui-tars-v1.5, will check image size", size);
1305
+ const currentPixels = size.width * size.height;
1306
+ const maxPixels = 16384 * 28 * 28;
1307
+ if (currentPixels > maxPixels) {
1308
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1309
+ const newWidth = Math.floor(size.width * resizeFactor);
1310
+ const newHeight = Math.floor(size.height * resizeFactor);
1311
+ debug2(
1312
+ "resize image",
1313
+ imageInfo,
1314
+ "new width",
1315
+ newWidth,
1316
+ "new height",
1317
+ newHeight
1318
+ );
1319
+ imagePayload = await resizeImgBase642(imagePayload, {
1320
+ width: newWidth,
1321
+ height: newHeight
1322
+ });
1323
+ }
1324
+ }
1292
1325
  this.appendConversationHistory({
1293
1326
  role: "user",
1294
1327
  content: [
1295
1328
  {
1296
1329
  type: "image_url",
1297
1330
  image_url: {
1298
- url: pageContext.screenshotBase64
1331
+ url: imagePayload
1299
1332
  }
1300
1333
  }
1301
1334
  ]
@@ -1638,9 +1671,9 @@ var PageTaskExecutor = class {
1638
1671
  };
1639
1672
 
1640
1673
  // src/common/plan-builder.ts
1641
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1674
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1642
1675
  import { assert as assert5 } from "@midscene/shared/utils";
1643
- var debug2 = getDebug2("plan-builder");
1676
+ var debug3 = getDebug3("plan-builder");
1644
1677
  function buildPlans(type, locateParam, param) {
1645
1678
  let returnPlans = [];
1646
1679
  const locatePlan = locateParam ? {
@@ -1702,14 +1735,14 @@ function buildPlans(type, locateParam, param) {
1702
1735
  returnPlans = [sleepPlan];
1703
1736
  }
1704
1737
  if (returnPlans) {
1705
- debug2("buildPlans", returnPlans);
1738
+ debug3("buildPlans", returnPlans);
1706
1739
  return returnPlans;
1707
1740
  }
1708
1741
  throw new Error(`Not supported type: ${type}`);
1709
1742
  }
1710
1743
 
1711
1744
  // src/common/agent.ts
1712
- var debug3 = getDebug3("web-integration");
1745
+ var debug4 = getDebug4("web-integration");
1713
1746
  var PageAgent = class {
1714
1747
  constructor(page, opts) {
1715
1748
  /**
@@ -1752,7 +1785,7 @@ var PageAgent = class {
1752
1785
  });
1753
1786
  }
1754
1787
  return await parseContextFromWebPage(this.page, {
1755
- ignoreMarker: !!vlLocateMode()
1788
+ ignoreMarker: !!vlLocateMode2()
1756
1789
  });
1757
1790
  }
1758
1791
  async setAIActionContext(prompt) {
@@ -1787,7 +1820,7 @@ var PageAgent = class {
1787
1820
  type: "dump",
1788
1821
  generateReport
1789
1822
  });
1790
- debug3("writeOutActionDumps", this.reportFile);
1823
+ debug4("writeOutActionDumps", this.reportFile);
1791
1824
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1792
1825
  printReportMsg(this.reportFile);
1793
1826
  }
@@ -1891,7 +1924,7 @@ ${errorTask?.errorStack}`);
1891
1924
  return output;
1892
1925
  }
1893
1926
  async aiAction(taskPrompt) {
1894
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1927
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1895
1928
  this.afterTaskRunning(executor);
1896
1929
  return output;
1897
1930
  }
@@ -1974,7 +2007,7 @@ ${errors}`);
1974
2007
  };
1975
2008
 
1976
2009
  // src/puppeteer/index.ts
1977
- import { getDebug as getDebug5 } from "@midscene/shared/logger";
2010
+ import { getDebug as getDebug6 } from "@midscene/shared/logger";
1978
2011
 
1979
2012
  // src/puppeteer/page.ts
1980
2013
  import {
@@ -1989,9 +2022,9 @@ import { sleep as sleep2 } from "@midscene/core/utils";
1989
2022
  import { DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT as DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT2 } from "@midscene/shared/constants";
1990
2023
  import { treeToList as treeToList2 } from "@midscene/shared/extractor";
1991
2024
  import { getExtraReturnLogic } from "@midscene/shared/fs";
1992
- import { getDebug as getDebug4 } from "@midscene/shared/logger";
2025
+ import { getDebug as getDebug5 } from "@midscene/shared/logger";
1993
2026
  import { assert as assert7 } from "@midscene/shared/utils";
1994
- var debugPage = getDebug4("web:page");
2027
+ var debugPage = getDebug5("web:page");
1995
2028
  var Page = class {
1996
2029
  constructor(underlyingPage, pageType, opts) {
1997
2030
  this.everMoved = false;
@@ -2274,14 +2307,14 @@ var WebPage = class extends Page {
2274
2307
 
2275
2308
  // src/puppeteer/index.ts
2276
2309
  import { overrideAIConfig } from "@midscene/shared/env";
2277
- var debug4 = getDebug5("puppeteer:agent");
2310
+ var debug5 = getDebug6("puppeteer:agent");
2278
2311
  var PuppeteerAgent = class extends PageAgent {
2279
2312
  constructor(page, opts) {
2280
2313
  const webPage = new WebPage(page);
2281
2314
  super(webPage, opts);
2282
2315
  const { forceSameTabNavigation = true } = opts ?? {};
2283
2316
  if (forceSameTabNavigation) {
2284
- forceClosePopup(page, debug4);
2317
+ forceClosePopup(page, debug5);
2285
2318
  }
2286
2319
  }
2287
2320
  };