@midscene/web 0.16.0 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/es/agent.js +43 -10
- package/dist/es/agent.js.map +1 -1
- package/dist/es/bridge-mode-browser.js +3 -3
- package/dist/es/bridge-mode.js +45 -12
- package/dist/es/bridge-mode.js.map +1 -1
- package/dist/es/chrome-extension.js +44 -11
- package/dist/es/chrome-extension.js.map +1 -1
- package/dist/es/index.js +58 -25
- package/dist/es/index.js.map +1 -1
- package/dist/es/midscene-playground.js +43 -10
- package/dist/es/midscene-playground.js.map +1 -1
- package/dist/es/playground.js +43 -10
- package/dist/es/playground.js.map +1 -1
- package/dist/es/playwright.js +55 -22
- package/dist/es/playwright.js.map +1 -1
- package/dist/es/puppeteer-agent-launcher.js +59 -23
- package/dist/es/puppeteer-agent-launcher.js.map +1 -1
- package/dist/es/puppeteer.js +53 -20
- package/dist/es/puppeteer.js.map +1 -1
- package/dist/lib/agent.js +40 -10
- package/dist/lib/agent.js.map +1 -1
- package/dist/lib/bridge-mode-browser.js +3 -3
- package/dist/lib/bridge-mode.js +45 -15
- package/dist/lib/bridge-mode.js.map +1 -1
- package/dist/lib/chrome-extension.js +43 -13
- package/dist/lib/chrome-extension.js.map +1 -1
- package/dist/lib/index.js +57 -27
- package/dist/lib/index.js.map +1 -1
- package/dist/lib/midscene-playground.js +43 -13
- package/dist/lib/midscene-playground.js.map +1 -1
- package/dist/lib/playground.js +40 -10
- package/dist/lib/playground.js.map +1 -1
- package/dist/lib/playwright.js +54 -24
- package/dist/lib/playwright.js.map +1 -1
- package/dist/lib/puppeteer-agent-launcher.js +57 -24
- package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
- package/dist/lib/puppeteer.js +52 -22
- package/dist/lib/puppeteer.js.map +1 -1
- package/package.json +3 -3
package/dist/es/agent.js
CHANGED
|
@@ -280,8 +280,8 @@ import {
|
|
|
280
280
|
DEFAULT_WAIT_FOR_NAVIGATION_TIMEOUT,
|
|
281
281
|
DEFAULT_WAIT_FOR_NETWORK_IDLE_TIMEOUT
|
|
282
282
|
} from "@midscene/shared/constants";
|
|
283
|
-
import { vlLocateMode } from "@midscene/shared/env";
|
|
284
|
-
import { getDebug as
|
|
283
|
+
import { vlLocateMode as vlLocateMode2 } from "@midscene/shared/env";
|
|
284
|
+
import { getDebug as getDebug4 } from "@midscene/shared/logger";
|
|
285
285
|
import { assert as assert6 } from "@midscene/shared/utils";
|
|
286
286
|
|
|
287
287
|
// src/common/tasks.ts
|
|
@@ -293,6 +293,14 @@ import {
|
|
|
293
293
|
vlmPlanning
|
|
294
294
|
} from "@midscene/core/ai-model";
|
|
295
295
|
import { sleep } from "@midscene/core/utils";
|
|
296
|
+
import { UITarsModelVersion } from "@midscene/shared/env";
|
|
297
|
+
import { uiTarsModelVersion } from "@midscene/shared/env";
|
|
298
|
+
import { vlLocateMode } from "@midscene/shared/env";
|
|
299
|
+
import {
|
|
300
|
+
imageInfo,
|
|
301
|
+
resizeImgBase64 as resizeImgBase642
|
|
302
|
+
} from "@midscene/shared/img";
|
|
303
|
+
import { getDebug as getDebug2 } from "@midscene/shared/logger";
|
|
296
304
|
import { assert as assert4 } from "@midscene/shared/utils";
|
|
297
305
|
|
|
298
306
|
// src/common/task-cache.ts
|
|
@@ -673,6 +681,7 @@ function paramStr(task) {
|
|
|
673
681
|
}
|
|
674
682
|
|
|
675
683
|
// src/common/tasks.ts
|
|
684
|
+
var debug2 = getDebug2("page-task-executor");
|
|
676
685
|
var replanningCountLimit = 10;
|
|
677
686
|
var isAndroidPage = (page) => {
|
|
678
687
|
return page.pageType === "android";
|
|
@@ -1261,13 +1270,37 @@ var PageTaskExecutor = class {
|
|
|
1261
1270
|
};
|
|
1262
1271
|
executorContext.task.recorder = [recordItem];
|
|
1263
1272
|
executorContext.task.pageContext = pageContext;
|
|
1273
|
+
let imagePayload = pageContext.screenshotBase64;
|
|
1274
|
+
if (vlLocateMode() === "vlm-ui-tars" && uiTarsModelVersion() === UITarsModelVersion.V1_5) {
|
|
1275
|
+
const size = pageContext.size;
|
|
1276
|
+
debug2("ui-tars-v1.5, will check image size", size);
|
|
1277
|
+
const currentPixels = size.width * size.height;
|
|
1278
|
+
const maxPixels = 16384 * 28 * 28;
|
|
1279
|
+
if (currentPixels > maxPixels) {
|
|
1280
|
+
const resizeFactor = Math.sqrt(maxPixels / currentPixels);
|
|
1281
|
+
const newWidth = Math.floor(size.width * resizeFactor);
|
|
1282
|
+
const newHeight = Math.floor(size.height * resizeFactor);
|
|
1283
|
+
debug2(
|
|
1284
|
+
"resize image",
|
|
1285
|
+
imageInfo,
|
|
1286
|
+
"new width",
|
|
1287
|
+
newWidth,
|
|
1288
|
+
"new height",
|
|
1289
|
+
newHeight
|
|
1290
|
+
);
|
|
1291
|
+
imagePayload = await resizeImgBase642(imagePayload, {
|
|
1292
|
+
width: newWidth,
|
|
1293
|
+
height: newHeight
|
|
1294
|
+
});
|
|
1295
|
+
}
|
|
1296
|
+
}
|
|
1264
1297
|
this.appendConversationHistory({
|
|
1265
1298
|
role: "user",
|
|
1266
1299
|
content: [
|
|
1267
1300
|
{
|
|
1268
1301
|
type: "image_url",
|
|
1269
1302
|
image_url: {
|
|
1270
|
-
url:
|
|
1303
|
+
url: imagePayload
|
|
1271
1304
|
}
|
|
1272
1305
|
}
|
|
1273
1306
|
]
|
|
@@ -1610,9 +1643,9 @@ var PageTaskExecutor = class {
|
|
|
1610
1643
|
};
|
|
1611
1644
|
|
|
1612
1645
|
// src/common/plan-builder.ts
|
|
1613
|
-
import { getDebug as
|
|
1646
|
+
import { getDebug as getDebug3 } from "@midscene/shared/logger";
|
|
1614
1647
|
import { assert as assert5 } from "@midscene/shared/utils";
|
|
1615
|
-
var
|
|
1648
|
+
var debug3 = getDebug3("plan-builder");
|
|
1616
1649
|
function buildPlans(type, locateParam, param) {
|
|
1617
1650
|
let returnPlans = [];
|
|
1618
1651
|
const locatePlan = locateParam ? {
|
|
@@ -1674,14 +1707,14 @@ function buildPlans(type, locateParam, param) {
|
|
|
1674
1707
|
returnPlans = [sleepPlan];
|
|
1675
1708
|
}
|
|
1676
1709
|
if (returnPlans) {
|
|
1677
|
-
|
|
1710
|
+
debug3("buildPlans", returnPlans);
|
|
1678
1711
|
return returnPlans;
|
|
1679
1712
|
}
|
|
1680
1713
|
throw new Error(`Not supported type: ${type}`);
|
|
1681
1714
|
}
|
|
1682
1715
|
|
|
1683
1716
|
// src/common/agent.ts
|
|
1684
|
-
var
|
|
1717
|
+
var debug4 = getDebug4("web-integration");
|
|
1685
1718
|
var PageAgent = class {
|
|
1686
1719
|
constructor(page, opts) {
|
|
1687
1720
|
/**
|
|
@@ -1724,7 +1757,7 @@ var PageAgent = class {
|
|
|
1724
1757
|
});
|
|
1725
1758
|
}
|
|
1726
1759
|
return await parseContextFromWebPage(this.page, {
|
|
1727
|
-
ignoreMarker: !!
|
|
1760
|
+
ignoreMarker: !!vlLocateMode2()
|
|
1728
1761
|
});
|
|
1729
1762
|
}
|
|
1730
1763
|
async setAIActionContext(prompt) {
|
|
@@ -1759,7 +1792,7 @@ var PageAgent = class {
|
|
|
1759
1792
|
type: "dump",
|
|
1760
1793
|
generateReport
|
|
1761
1794
|
});
|
|
1762
|
-
|
|
1795
|
+
debug4("writeOutActionDumps", this.reportFile);
|
|
1763
1796
|
if (generateReport && autoPrintReportMsg && this.reportFile) {
|
|
1764
1797
|
printReportMsg(this.reportFile);
|
|
1765
1798
|
}
|
|
@@ -1863,7 +1896,7 @@ ${errorTask?.errorStack}`);
|
|
|
1863
1896
|
return output;
|
|
1864
1897
|
}
|
|
1865
1898
|
async aiAction(taskPrompt) {
|
|
1866
|
-
const { output, executor } = await (
|
|
1899
|
+
const { output, executor } = await (vlLocateMode2() === "vlm-ui-tars" ? this.taskExecutor.actionToGoal(taskPrompt) : this.taskExecutor.action(taskPrompt, this.opts.aiActionContext));
|
|
1867
1900
|
this.afterTaskRunning(executor);
|
|
1868
1901
|
return output;
|
|
1869
1902
|
}
|