@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
@@ -40,7 +40,7 @@ __export(agent_launcher_exports, {
40
40
  });
41
41
  module.exports = __toCommonJS(agent_launcher_exports);
42
42
  var import_node_fs3 = require("fs");
43
- var import_logger6 = require("@midscene/shared/logger");
43
+ var import_logger7 = require("@midscene/shared/logger");
44
44
  var import_utils18 = require("@midscene/shared/utils");
45
45
 
46
46
  // src/common/agent.ts
@@ -315,14 +315,19 @@ function parseYamlScript(content, filePath, ignoreCheckingTarget) {
315
315
  // src/common/agent.ts
316
316
  var import_utils11 = require("@midscene/core/utils");
317
317
  var import_constants = require("@midscene/shared/constants");
318
- var import_env3 = require("@midscene/shared/env");
319
- var import_logger3 = require("@midscene/shared/logger");
318
+ var import_env6 = require("@midscene/shared/env");
319
+ var import_logger4 = require("@midscene/shared/logger");
320
320
  var import_utils12 = require("@midscene/shared/utils");
321
321
 
322
322
  // src/common/tasks.ts
323
323
  var import_core = require("@midscene/core");
324
324
  var import_ai_model = require("@midscene/core/ai-model");
325
325
  var import_utils8 = require("@midscene/core/utils");
326
+ var import_env3 = require("@midscene/shared/env");
327
+ var import_env4 = require("@midscene/shared/env");
328
+ var import_env5 = require("@midscene/shared/env");
329
+ var import_img2 = require("@midscene/shared/img");
330
+ var import_logger2 = require("@midscene/shared/logger");
326
331
  var import_utils9 = require("@midscene/shared/utils");
327
332
 
328
333
  // src/common/task-cache.ts
@@ -425,7 +430,7 @@ function printReportMsg(filepath) {
425
430
  function replaceIllegalPathCharsAndSpace(str) {
426
431
  return str.replace(/[/\\:*?"<>| ]/g, "-");
427
432
  }
428
- function forceClosePopup(page, debug5) {
433
+ function forceClosePopup(page, debug6) {
429
434
  page.on("popup", async (popup) => {
430
435
  if (!popup) {
431
436
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -437,19 +442,19 @@ function forceClosePopup(page, debug5) {
437
442
  try {
438
443
  await popup.close();
439
444
  } catch (error) {
440
- debug5(`failed to close popup ${url}, error: ${error}`);
445
+ debug6(`failed to close popup ${url}, error: ${error}`);
441
446
  }
442
447
  } else {
443
- debug5(`popup is already closed, skip close ${url}`);
448
+ debug6(`popup is already closed, skip close ${url}`);
444
449
  }
445
450
  if (!page.isClosed()) {
446
451
  try {
447
452
  await page.goto(url);
448
453
  } catch (error) {
449
- debug5(`failed to goto ${url}, error: ${error}`);
454
+ debug6(`failed to goto ${url}, error: ${error}`);
450
455
  }
451
456
  } else {
452
- debug5(`page is already closed, skip goto ${url}`);
457
+ debug6(`page is already closed, skip goto ${url}`);
453
458
  }
454
459
  });
455
460
  }
@@ -731,6 +736,7 @@ function paramStr(task) {
731
736
  }
732
737
 
733
738
  // src/common/tasks.ts
739
+ var debug2 = (0, import_logger2.getDebug)("page-task-executor");
734
740
  var replanningCountLimit = 10;
735
741
  var isAndroidPage = (page) => {
736
742
  return page.pageType === "android";
@@ -1319,13 +1325,37 @@ var PageTaskExecutor = class {
1319
1325
  };
1320
1326
  executorContext.task.recorder = [recordItem];
1321
1327
  executorContext.task.pageContext = pageContext;
1328
+ let imagePayload = pageContext.screenshotBase64;
1329
+ if ((0, import_env5.vlLocateMode)() === "vlm-ui-tars" && (0, import_env4.uiTarsModelVersion)() === import_env3.UITarsModelVersion.V1_5) {
1330
+ const size = pageContext.size;
1331
+ debug2("ui-tars-v1.5, will check image size", size);
1332
+ const currentPixels = size.width * size.height;
1333
+ const maxPixels = 16384 * 28 * 28;
1334
+ if (currentPixels > maxPixels) {
1335
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1336
+ const newWidth = Math.floor(size.width * resizeFactor);
1337
+ const newHeight = Math.floor(size.height * resizeFactor);
1338
+ debug2(
1339
+ "resize image",
1340
+ import_img2.imageInfo,
1341
+ "new width",
1342
+ newWidth,
1343
+ "new height",
1344
+ newHeight
1345
+ );
1346
+ imagePayload = await (0, import_img2.resizeImgBase64)(imagePayload, {
1347
+ width: newWidth,
1348
+ height: newHeight
1349
+ });
1350
+ }
1351
+ }
1322
1352
  this.appendConversationHistory({
1323
1353
  role: "user",
1324
1354
  content: [
1325
1355
  {
1326
1356
  type: "image_url",
1327
1357
  image_url: {
1328
- url: pageContext.screenshotBase64
1358
+ url: imagePayload
1329
1359
  }
1330
1360
  }
1331
1361
  ]
@@ -1668,9 +1698,9 @@ var PageTaskExecutor = class {
1668
1698
  };
1669
1699
 
1670
1700
  // src/common/plan-builder.ts
1671
- var import_logger2 = require("@midscene/shared/logger");
1701
+ var import_logger3 = require("@midscene/shared/logger");
1672
1702
  var import_utils10 = require("@midscene/shared/utils");
1673
- var debug2 = (0, import_logger2.getDebug)("plan-builder");
1703
+ var debug3 = (0, import_logger3.getDebug)("plan-builder");
1674
1704
  function buildPlans(type, locateParam, param) {
1675
1705
  let returnPlans = [];
1676
1706
  const locatePlan = locateParam ? {
@@ -1732,14 +1762,14 @@ function buildPlans(type, locateParam, param) {
1732
1762
  returnPlans = [sleepPlan];
1733
1763
  }
1734
1764
  if (returnPlans) {
1735
- debug2("buildPlans", returnPlans);
1765
+ debug3("buildPlans", returnPlans);
1736
1766
  return returnPlans;
1737
1767
  }
1738
1768
  throw new Error(`Not supported type: ${type}`);
1739
1769
  }
1740
1770
 
1741
1771
  // src/common/agent.ts
1742
- var debug3 = (0, import_logger3.getDebug)("web-integration");
1772
+ var debug4 = (0, import_logger4.getDebug)("web-integration");
1743
1773
  var PageAgent = class {
1744
1774
  constructor(page, opts) {
1745
1775
  /**
@@ -1782,7 +1812,7 @@ var PageAgent = class {
1782
1812
  });
1783
1813
  }
1784
1814
  return await parseContextFromWebPage(this.page, {
1785
- ignoreMarker: !!(0, import_env3.vlLocateMode)()
1815
+ ignoreMarker: !!(0, import_env6.vlLocateMode)()
1786
1816
  });
1787
1817
  }
1788
1818
  async setAIActionContext(prompt) {
@@ -1817,7 +1847,7 @@ var PageAgent = class {
1817
1847
  type: "dump",
1818
1848
  generateReport
1819
1849
  });
1820
- debug3("writeOutActionDumps", this.reportFile);
1850
+ debug4("writeOutActionDumps", this.reportFile);
1821
1851
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1822
1852
  printReportMsg(this.reportFile);
1823
1853
  }
@@ -1921,7 +1951,7 @@ ${errorTask?.errorStack}`);
1921
1951
  return output;
1922
1952
  }
1923
1953
  async aiAction(taskPrompt) {
1924
- const { output, executor } = await ((0, import_env3.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1954
+ const { output, executor } = await ((0, import_env6.vlLocateMode)() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1925
1955
  this.afterTaskRunning(executor);
1926
1956
  return output;
1927
1957
  }
@@ -2004,7 +2034,7 @@ ${errors}`);
2004
2034
  };
2005
2035
 
2006
2036
  // src/puppeteer/index.ts
2007
- var import_logger5 = require("@midscene/shared/logger");
2037
+ var import_logger6 = require("@midscene/shared/logger");
2008
2038
 
2009
2039
  // src/puppeteer/page.ts
2010
2040
  var import_constants3 = require("@midscene/shared/constants");
@@ -2014,9 +2044,9 @@ var import_utils15 = require("@midscene/core/utils");
2014
2044
  var import_constants2 = require("@midscene/shared/constants");
2015
2045
  var import_extractor2 = require("@midscene/shared/extractor");
2016
2046
  var import_fs2 = require("@midscene/shared/fs");
2017
- var import_logger4 = require("@midscene/shared/logger");
2047
+ var import_logger5 = require("@midscene/shared/logger");
2018
2048
  var import_utils16 = require("@midscene/shared/utils");
2019
- var debugPage = (0, import_logger4.getDebug)("web:page");
2049
+ var debugPage = (0, import_logger5.getDebug)("web:page");
2020
2050
  var Page = class {
2021
2051
  constructor(underlyingPage, pageType, opts) {
2022
2052
  this.everMoved = false;
@@ -2298,15 +2328,15 @@ var WebPage = class extends Page {
2298
2328
  };
2299
2329
 
2300
2330
  // src/puppeteer/index.ts
2301
- var import_env4 = require("@midscene/shared/env");
2302
- var debug4 = (0, import_logger5.getDebug)("puppeteer:agent");
2331
+ var import_env7 = require("@midscene/shared/env");
2332
+ var debug5 = (0, import_logger6.getDebug)("puppeteer:agent");
2303
2333
  var PuppeteerAgent = class extends PageAgent {
2304
2334
  constructor(page, opts) {
2305
2335
  const webPage = new WebPage(page);
2306
2336
  super(webPage, opts);
2307
2337
  const { forceSameTabNavigation = true } = opts ?? {};
2308
2338
  if (forceSameTabNavigation) {
2309
- forceClosePopup(page, debug4);
2339
+ forceClosePopup(page, debug5);
2310
2340
  }
2311
2341
  }
2312
2342
  };
@@ -2319,7 +2349,7 @@ var defaultViewportWidth = 1440;
2319
2349
  var defaultViewportHeight = 768;
2320
2350
  var defaultViewportScale = process.platform === "darwin" ? 2 : 1;
2321
2351
  var defaultWaitForNetworkIdleTimeout = import_constants4.DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT;
2322
- var launcherDebug = (0, import_logger6.getDebug)("puppeteer:launcher");
2352
+ var launcherDebug = (0, import_logger7.getDebug)("puppeteer:launcher");
2323
2353
  async function launchPuppeteerPage(target, preference) {
2324
2354
  (0, import_utils18.assert)(target.url, "url is required");
2325
2355
  const freeFn = [];
@@ -2381,10 +2411,13 @@ async function launchPuppeteerPage(target, preference) {
2381
2411
  // add 200px for the address bar
2382
2412
  ];
2383
2413
  launcherDebug(
2384
- "launching browser with viewport, headed: %s, viewport: %j, args: %j, preference: %j",
2414
+ "launching browser with viewport, headed",
2385
2415
  headed,
2416
+ "viewport",
2386
2417
  viewportConfig,
2418
+ "args",
2387
2419
  args,
2420
+ "preference",
2388
2421
  preference
2389
2422
  );
2390
2423
  const browser = await import_puppeteer2.default.launch({