@midscene/web 0.16.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/es/agent.js +43 -10
  2. package/dist/es/agent.js.map +1 -1
  3. package/dist/es/bridge-mode-browser.js +3 -3
  4. package/dist/es/bridge-mode.js +45 -12
  5. package/dist/es/bridge-mode.js.map +1 -1
  6. package/dist/es/chrome-extension.js +44 -11
  7. package/dist/es/chrome-extension.js.map +1 -1
  8. package/dist/es/index.js +58 -25
  9. package/dist/es/index.js.map +1 -1
  10. package/dist/es/midscene-playground.js +43 -10
  11. package/dist/es/midscene-playground.js.map +1 -1
  12. package/dist/es/playground.js +43 -10
  13. package/dist/es/playground.js.map +1 -1
  14. package/dist/es/playwright.js +55 -22
  15. package/dist/es/playwright.js.map +1 -1
  16. package/dist/es/puppeteer-agent-launcher.js +59 -23
  17. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  18. package/dist/es/puppeteer.js +53 -20
  19. package/dist/es/puppeteer.js.map +1 -1
  20. package/dist/lib/agent.js +40 -10
  21. package/dist/lib/agent.js.map +1 -1
  22. package/dist/lib/bridge-mode-browser.js +3 -3
  23. package/dist/lib/bridge-mode.js +45 -15
  24. package/dist/lib/bridge-mode.js.map +1 -1
  25. package/dist/lib/chrome-extension.js +43 -13
  26. package/dist/lib/chrome-extension.js.map +1 -1
  27. package/dist/lib/index.js +57 -27
  28. package/dist/lib/index.js.map +1 -1
  29. package/dist/lib/midscene-playground.js +43 -13
  30. package/dist/lib/midscene-playground.js.map +1 -1
  31. package/dist/lib/playground.js +40 -10
  32. package/dist/lib/playground.js.map +1 -1
  33. package/dist/lib/playwright.js +54 -24
  34. package/dist/lib/playwright.js.map +1 -1
  35. package/dist/lib/puppeteer-agent-launcher.js +57 -24
  36. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  37. package/dist/lib/puppeteer.js +52 -22
  38. package/dist/lib/puppeteer.js.map +1 -1
  39. package/package.json +3 -3
package/dist/es/index.js CHANGED
@@ -280,8 +280,8 @@ import {
280
280
  DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
281
281
  DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
282
282
  } from "@midscene/shared/constants";
283
- import { vlLocateMode } from "@midscene/shared/env";
284
- import { getDebug as getDebug3 } from "@midscene/shared/logger";
283
+ import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
284
+ import { getDebug as getDebug4 } from "@midscene/shared/logger";
285
285
  import { assert as assert6 } from "@midscene/shared/utils";
286
286
 
287
287
  // src/common/tasks.ts
@@ -293,6 +293,14 @@ import {
293
293
  vlmPlanning
294
294
  } from "@midscene/core/ai-model";
295
295
  import { sleep } from "@midscene/core/utils";
296
+ import { UITarsModelVersion } from "@midscene/shared/env";
297
+ import { uiTarsModelVersion } from "@midscene/shared/env";
298
+ import { vlLocateMode } from "@midscene/shared/env";
299
+ import {
300
+ imageInfo,
301
+ resizeImgBase64 as resizeImgBase642
302
+ } from "@midscene/shared/img";
303
+ import { getDebug as getDebug2 } from "@midscene/shared/logger";
296
304
  import { assert as assert4 } from "@midscene/shared/utils";
297
305
 
298
306
  // src/common/task-cache.ts
@@ -395,7 +403,7 @@ function printReportMsg(filepath) {
395
403
  function replaceIllegalPathCharsAndSpace(str) {
396
404
  return str.replace(/[/\\:*?"<>| ]/g, "-");
397
405
  }
398
- function forceClosePopup(page, debug6) {
406
+ function forceClosePopup(page, debug7) {
399
407
  page.on("popup", async (popup) => {
400
408
  if (!popup) {
401
409
  console.warn("got a popup event, but the popup is not ready yet, skip");
@@ -407,19 +415,19 @@ function forceClosePopup(page, debug6) {
407
415
  try {
408
416
  await popup.close();
409
417
  } catch (error) {
410
- debug6(`failed to close popup ${url}, error: ${error}`);
418
+ debug7(`failed to close popup ${url}, error: ${error}`);
411
419
  }
412
420
  } else {
413
- debug6(`popup is already closed, skip close ${url}`);
421
+ debug7(`popup is already closed, skip close ${url}`);
414
422
  }
415
423
  if (!page.isClosed()) {
416
424
  try {
417
425
  await page.goto(url);
418
426
  } catch (error) {
419
- debug6(`failed to goto ${url}, error: ${error}`);
427
+ debug7(`failed to goto ${url}, error: ${error}`);
420
428
  }
421
429
  } else {
422
- debug6(`page is already closed, skip goto ${url}`);
430
+ debug7(`page is already closed, skip goto ${url}`);
423
431
  }
424
432
  });
425
433
  }
@@ -701,6 +709,7 @@ function paramStr(task) {
701
709
  }
702
710
 
703
711
  // src/common/tasks.ts
712
+ var debug2 = getDebug2("page-task-executor");
704
713
  var replanningCountLimit = 10;
705
714
  var isAndroidPage = (page) => {
706
715
  return page.pageType === "android";
@@ -1289,13 +1298,37 @@ var PageTaskExecutor = class {
1289
1298
  };
1290
1299
  executorContext.task.recorder = [recordItem];
1291
1300
  executorContext.task.pageContext = pageContext;
1301
+ let imagePayload = pageContext.screenshotBase64;
1302
+ if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
1303
+ const size = pageContext.size;
1304
+ debug2("ui-tars-v1.5, will check image size", size);
1305
+ const currentPixels = size.width * size.height;
1306
+ const maxPixels = 16384 * 28 * 28;
1307
+ if (currentPixels > maxPixels) {
1308
+ const resizeFactor = Math.sqrt(maxPixels / currentPixels);
1309
+ const newWidth = Math.floor(size.width * resizeFactor);
1310
+ const newHeight = Math.floor(size.height * resizeFactor);
1311
+ debug2(
1312
+ "resize image",
1313
+ imageInfo,
1314
+ "new width",
1315
+ newWidth,
1316
+ "new height",
1317
+ newHeight
1318
+ );
1319
+ imagePayload = await resizeImgBase642(imagePayload, {
1320
+ width: newWidth,
1321
+ height: newHeight
1322
+ });
1323
+ }
1324
+ }
1292
1325
  this.appendConversationHistory({
1293
1326
  role: "user",
1294
1327
  content: [
1295
1328
  {
1296
1329
  type: "image_url",
1297
1330
  image_url: {
1298
- url: pageContext.screenshotBase64
1331
+ url: imagePayload
1299
1332
  }
1300
1333
  }
1301
1334
  ]
@@ -1638,9 +1671,9 @@ var PageTaskExecutor = class {
1638
1671
  };
1639
1672
 
1640
1673
  // src/common/plan-builder.ts
1641
- import { getDebug as getDebug2 } from "@midscene/shared/logger";
1674
+ import { getDebug as getDebug3 } from "@midscene/shared/logger";
1642
1675
  import { assert as assert5 } from "@midscene/shared/utils";
1643
- var debug2 = getDebug2("plan-builder");
1676
+ var debug3 = getDebug3("plan-builder");
1644
1677
  function buildPlans(type, locateParam, param) {
1645
1678
  let returnPlans = [];
1646
1679
  const locatePlan = locateParam ? {
@@ -1702,14 +1735,14 @@ function buildPlans(type, locateParam, param) {
1702
1735
  returnPlans = [sleepPlan];
1703
1736
  }
1704
1737
  if (returnPlans) {
1705
- debug2("buildPlans", returnPlans);
1738
+ debug3("buildPlans", returnPlans);
1706
1739
  return returnPlans;
1707
1740
  }
1708
1741
  throw new Error(`Not supported type: ${type}`);
1709
1742
  }
1710
1743
 
1711
1744
  // src/common/agent.ts
1712
- var debug3 = getDebug3("web-integration");
1745
+ var debug4 = getDebug4("web-integration");
1713
1746
  var PageAgent = class {
1714
1747
  constructor(page, opts) {
1715
1748
  /**
@@ -1752,7 +1785,7 @@ var PageAgent = class {
1752
1785
  });
1753
1786
  }
1754
1787
  return await parseContextFromWebPage(this.page, {
1755
- ignoreMarker: !!vlLocateMode()
1788
+ ignoreMarker: !!vlLocateMode2()
1756
1789
  });
1757
1790
  }
1758
1791
  async setAIActionContext(prompt) {
@@ -1787,7 +1820,7 @@ var PageAgent = class {
1787
1820
  type: "dump",
1788
1821
  generateReport
1789
1822
  });
1790
- debug3("writeOutActionDumps", this.reportFile);
1823
+ debug4("writeOutActionDumps", this.reportFile);
1791
1824
  if (generateReport && autoPrintReportMsg && this.reportFile) {
1792
1825
  printReportMsg(this.reportFile);
1793
1826
  }
@@ -1891,7 +1924,7 @@ ${errorTask?.errorStack}`);
1891
1924
  return output;
1892
1925
  }
1893
1926
  async aiAction(taskPrompt) {
1894
- const { output, executor } = await (vlLocateMode() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1927
+ const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
1895
1928
  this.afterTaskRunning(executor);
1896
1929
  return output;
1897
1930
  }
@@ -1978,9 +2011,9 @@ import { sleep as sleep2 } from "@midscene/core/utils";
1978
2011
  import { DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT as DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT2 } from "@midscene/shared/constants";
1979
2012
  import { treeToList as treeToList2 } from "@midscene/shared/extractor";
1980
2013
  import { getExtraReturnLogic } from "@midscene/shared/fs";
1981
- import { getDebug as getDebug4 } from "@midscene/shared/logger";
2014
+ import { getDebug as getDebug5 } from "@midscene/shared/logger";
1982
2015
  import { assert as assert7 } from "@midscene/shared/utils";
1983
- var debugPage = getDebug4("web:page");
2016
+ var debugPage = getDebug5("web:page");
1984
2017
  var Page = class {
1985
2018
  constructor(underlyingPage, pageType, opts) {
1986
2019
  this.everMoved = false;
@@ -2250,9 +2283,9 @@ var WebPage = class extends Page {
2250
2283
 
2251
2284
  // src/playwright/ai-fixture.ts
2252
2285
  import { randomUUID } from "crypto";
2253
- import { getDebug as getDebug5 } from "@midscene/shared/logger";
2286
+ import { getDebug as getDebug6 } from "@midscene/shared/logger";
2254
2287
  import { test } from "@playwright/test";
2255
- var debugPage2 = getDebug5("web:playwright:ai-fixture");
2288
+ var debugPage2 = getDebug6("web:playwright:ai-fixture");
2256
2289
  var groupAndCaseForTest = (testInfo) => {
2257
2290
  let taskFile;
2258
2291
  let taskTitle;
@@ -2440,15 +2473,15 @@ var PlaywrightAiFixture = (options) => {
2440
2473
 
2441
2474
  // src/playwright/index.ts
2442
2475
  import { overrideAIConfig } from "@midscene/shared/env";
2443
- import { getDebug as getDebug6 } from "@midscene/shared/logger";
2444
- var debug4 = getDebug6("playwright:agent");
2476
+ import { getDebug as getDebug7 } from "@midscene/shared/logger";
2477
+ var debug5 = getDebug7("playwright:agent");
2445
2478
  var PlaywrightAgent = class extends PageAgent {
2446
2479
  constructor(page, opts) {
2447
2480
  const webPage = new WebPage(page);
2448
2481
  super(webPage, opts);
2449
2482
  const { forceSameTabNavigation = true } = opts ?? {};
2450
2483
  if (forceSameTabNavigation) {
2451
- forceClosePopup(page, debug4);
2484
+ forceClosePopup(page, debug5);
2452
2485
  }
2453
2486
  }
2454
2487
  async waitForNetworkIdle(timeout = 1e3) {
@@ -2457,7 +2490,7 @@ var PlaywrightAgent = class extends PageAgent {
2457
2490
  };
2458
2491
 
2459
2492
  // src/puppeteer/index.ts
2460
- import { getDebug as getDebug7 } from "@midscene/shared/logger";
2493
+ import { getDebug as getDebug8 } from "@midscene/shared/logger";
2461
2494
 
2462
2495
  // src/puppeteer/page.ts
2463
2496
  import {
@@ -2487,14 +2520,14 @@ var WebPage2 = class extends Page {
2487
2520
 
2488
2521
  // src/puppeteer/index.ts
2489
2522
  import { overrideAIConfig as overrideAIConfig2 } from "@midscene/shared/env";
2490
- var debug5 = getDebug7("puppeteer:agent");
2523
+ var debug6 = getDebug8("puppeteer:agent");
2491
2524
  var PuppeteerAgent = class extends PageAgent {
2492
2525
  constructor(page, opts) {
2493
2526
  const webPage = new WebPage2(page);
2494
2527
  super(webPage, opts);
2495
2528
  const { forceSameTabNavigation = true } = opts ?? {};
2496
2529
  if (forceSameTabNavigation) {
2497
- forceClosePopup(page, debug5);
2530
+ forceClosePopup(page, debug6);
2498
2531
  }
2499
2532
  }
2500
2533
  };