@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -1,6 +1,6 @@
1
1
  // src/puppeteer/agent-launcher.ts
2
2
  import { readFileSync as readFileSync2 } from "fs";
3
- import { getDebug as getDebug6 } from "@midscene/shared/logger";
3
+ import { getDebug as getDebug7 } from "@midscene/shared/logger";
4
4
  import { assert as assert8 } from "@midscene/shared/utils";
5
5
 
6
6
  // src/common/agent.ts
@@ -285,8 +285,8 @@ import {
285
285
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
286
286
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
287
287
  } from "@midscene/shared/constants";
288
- import { vlLocateMode } from "@midscene/shared/env";
289
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
288
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
289
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
290
290
  import { assert as assert6 } from "@midscene/shared/utils";
291
291
 
292
292
  // src/common/tasks.ts
@@ -298,6 +298,14 @@ import {
298
298
  vlmPlanning
299
299
  } from "@midscene/core/ai-model";
300
300
  import { sleep } from "@midscene/core/utils";
301
+ import { UITarsModelVersion } from "@midscene/shared/env";
302
+ import { uiTarsModelVersion } from "@midscene/shared/env";
303
+ import { vlLocateMode } from "@midscene/shared/env";
304
+ import {
305
+ imageInfo,
306
+ resizeImgBase64 as resizeImgBase642
307
+ } from "@midscene/shared/img";
308
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
301
309
  import { assert as assert4 } from "@midscene/shared/utils";
302
310
 
303
311
  // src/common/task-cache.ts
@@ -400,7 +408,7 @@ function printReportMsg(filepath) {
400
408
  function replaceIllegalPathCharsAndSpace(str) {
401
409
  return str.replace(/[/\\:*?"<>| ]/g, "-");
402
410
  }
403
- function forceClosePopup(page, debug5) {
411
+ function forceClosePopup(page, debug6) {
404
412
  page.on("popup", async (popup) => {
405
413
  if (!popup) {
406
414
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -412,19 +420,19 @@ function forceClosePopup(page, debug5) {
412
420
  try {
413
421
  await popup.close();
414
422
  } catch (error) {
415
- debug5(`failed to close popup ${url}, error: ${error}`);
423
+ debug6(`failed to close popup ${url}, error: ${error}`);
416
424
  }
417
425
  } else {
418
- debug5(`popup is already closed, skip close ${url}`);
426
+ debug6(`popup is already closed, skip close ${url}`);
419
427
  }
420
428
  if (!page.isClosed()) {
421
429
  try {
422
430
  await page.goto(url);
423
431
  } catch (error) {
424
- debug5(`failed to goto ${url}, error: ${error}`);
432
+ debug6(`failed to goto ${url}, error: ${error}`);
425
433
  }
426
434
  } else {
427
- debug5(`page is already closed, skip goto ${url}`);
435
+ debug6(`page is already closed, skip goto ${url}`);
428
436
  }
429
437
  });
430
438
  }
@@ -706,6 +714,7 @@ function paramStr(task) {
706
714
  }
707
715
 
708
716
  // src/common/tasks.ts
717
+ var debug2 = getDebug2("page-task-executor");
709
718
  var replanningCountLimit = 10;
710
719
  var isAndroidPage = (page) => {
711
720
  return page.pageType === "android";
@@ -1294,13 +1303,37 @@ var PageTaskExecutor = class {
1294
1303
  };
1295
1304
  executorContext.task.recorder = [recordItem];
1296
1305
  executorContext.task.pageContext = pageContext;
1306
+ let imagePayload = pageContext.screenshotBase64;
1307
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1308
+ const size = pageContext.size;
1309
+ debug2("ui-tars-v1.5, will check image size", size);
1310
+ const currentPixels = size.width * size.height;
1311
+ const maxPixels = 16384 * 28 * 28;
1312
+ if (currentPixels > maxPixels) {
1313
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1314
+ const newWidth = Math.floor(size.width * resizeFactor);
1315
+ const newHeight = Math.floor(size.height * resizeFactor);
1316
+ debug2(
1317
+ "resize image",
1318
+ imageInfo,
1319
+ "new width",
1320
+ newWidth,
1321
+ "new height",
1322
+ newHeight
1323
+ );
1324
+ imagePayload = await resizeImgBase642(imagePayload, {
1325
+ width: newWidth,
1326
+ height: newHeight
1327
+ });
1328
+ }
1329
+ }
1297
1330
  this.appendConversationHistory({
1298
1331
  role: "user",
1299
1332
  content: [
1300
1333
  {
1301
1334
  type: "image_url",
1302
1335
  image_url: {
1303
- url: pageContext.screenshotBase64
1336
+ url: imagePayload
1304
1337
  }
1305
1338
  }
1306
1339
  ]
@@ -1643,9 +1676,9 @@ var PageTaskExecutor = class {
1643
1676
  };
1644
1677
 
1645
1678
  // src/common/plan-builder.ts
1646
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1679
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1647
1680
  import { assert as assert5 } from "@midscene/shared/utils";
1648
- var debug2 = getDebug2("plan-builder");
1681
+ var debug3 = getDebug3("plan-builder");
1649
1682
  function buildPlans(type, locateParam, param) {
1650
1683
  let returnPlans = [];
1651
1684
  const locatePlan = locateParam ? {
@@ -1707,14 +1740,14 @@ function buildPlans(type, locateParam, param) {
1707
1740
  returnPlans = [sleepPlan];
1708
1741
  }
1709
1742
  if (returnPlans) {
1710
- debug2("buildPlans", returnPlans);
1743
+ debug3("buildPlans", returnPlans);
1711
1744
  return returnPlans;
1712
1745
  }
1713
1746
  throw new Error(`Not supported type: ${type}`);
1714
1747
  }
1715
1748
 
1716
1749
  // src/common/agent.ts
1717
- var debug3 = getDebug3("web-integration");
1750
+ var debug4 = getDebug4("web-integration");
1718
1751
  var PageAgent = class {
1719
1752
  constructor(page, opts) {
1720
1753
  /**
@@ -1757,7 +1790,7 @@ var PageAgent = class {
1757
1790
  });
1758
1791
  }
1759
1792
  return await parseContextFromWebPage(this.page, {
1760
- ignoreMarker: !!vlLocateMode()
1793
+ ignoreMarker: !!vlLocateMode2()
1761
1794
  });
1762
1795
  }
1763
1796
  async setAIActionContext(prompt) {
@@ -1792,7 +1825,7 @@ var PageAgent = class {
1792
1825
  type: "dump",
1793
1826
  generateReport
1794
1827
  });
1795
- debug3("writeOutActionDumps", this.reportFile);
1828
+ debug4("writeOutActionDumps", this.reportFile);
1796
1829
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1797
1830
  printReportMsg(this.reportFile);
1798
1831
  }
@@ -1896,7 +1929,7 @@ ${errorTask?.errorStack}`);
1896
1929
  return output;
1897
1930
  }
1898
1931
  async aiAction(taskPrompt) {
1899
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1932
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1900
1933
  this.afterTaskRunning(executor);
1901
1934
  return output;
1902
1935
  }
@@ -1979,7 +2012,7 @@ ${errors}`);
1979
2012
  };
1980
2013
 
1981
2014
  // src/puppeteer/index.ts
1982
- import { getDebug as getDebug5 } from "@midscene/shared/logger";
2015
+ import { getDebug as getDebug6 } from "@midscene/shared/logger";
1983
2016
 
1984
2017
  // src/puppeteer/page.ts
1985
2018
  import {
@@ -1994,9 +2027,9 @@ import { sleep as sleep2 } from "@midscene/core/utils";
1994
2027
  import { DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT as DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT2 } from "@midscene/shared/constants";
1995
2028
  import { treeToList as treeToList2 } from "@midscene/shared/extractor";
1996
2029
  import { getExtraReturnLogic } from "@midscene/shared/fs";
1997
- import { getDebug as getDebug4 } from "@midscene/shared/logger";
2030
+ import { getDebug as getDebug5 } from "@midscene/shared/logger";
1998
2031
  import { assert as assert7 } from "@midscene/shared/utils";
1999
- var debugPage = getDebug4("web:page");
2032
+ var debugPage = getDebug5("web:page");
2000
2033
  var Page = class {
2001
2034
  constructor(underlyingPage, pageType, opts) {
2002
2035
  this.everMoved = false;
@@ -2279,14 +2312,14 @@ var WebPage = class extends Page {
2279
2312
 
2280
2313
  // src/puppeteer/index.ts
2281
2314
  import { overrideAIConfig } from "@midscene/shared/env";
2282
- var debug4 = getDebug5("puppeteer:agent");
2315
+ var debug5 = getDebug6("puppeteer:agent");
2283
2316
  var PuppeteerAgent = class extends PageAgent {
2284
2317
  constructor(page, opts) {
2285
2318
  const webPage = new WebPage(page);
2286
2319
  super(webPage, opts);
2287
2320
  const { forceSameTabNavigation = true } = opts ?? {};
2288
2321
  if (forceSameTabNavigation) {
2289
- forceClosePopup(page, debug4);
2322
+ forceClosePopup(page, debug5);
2290
2323
  }
2291
2324
  }
2292
2325
  };
@@ -2299,7 +2332,7 @@ var defaultViewportWidth = 1440;
2299
2332
  var defaultViewportHeight = 768;
2300
2333
  var defaultViewportScale = process.platform === "darwin" ? 2 : 1;
2301
2334
  var defaultWaitForNetworkIdleTimeout = DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT3;
2302
- var launcherDebug = getDebug6("puppeteer:launcher");
2335
+ var launcherDebug = getDebug7("puppeteer:launcher");
2303
2336
  async function launchPuppeteerPage(target, preference) {
2304
2337
  assert8(target.url, "url is required");
2305
2338
  const freeFn = [];
@@ -2361,10 +2394,13 @@ async function launchPuppeteerPage(target, preference) {
2361
2394
  // add 200px for the address bar
2362
2395
  ];
2363
2396
  launcherDebug(
2364
- "launching browser with viewport, headed: %s, viewport: %j, args: %j, preference: %j",
2397
+ "launching browser with viewport, headed",
2365
2398
  headed,
2399
+ "viewport",
2366
2400
  viewportConfig,
2401
+ "args",
2367
2402
  args,
2403
+ "preference",
2368
2404
  preference
2369
2405
  );
2370
2406
  const browser = await puppeteer.launch({