@skrillex1224/playwright-toolkit 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +21 -0
- package/dist/src/apify-kit.js +96 -0
- package/dist/src/constants.js +21 -0
- package/dist/src/humanize.js +30 -0
- package/dist/src/launch.js +27 -0
- package/dist/src/live-view.js +64 -0
- package/dist/src/stealth.js +70 -0
- package/dist/src/utils.js +24 -0
- package/package.json +12 -2
- package/index.js +0 -21
- package/src/apify-kit.js +0 -108
- package/src/constants.js +0 -18
- package/src/humanize.js +0 -37
- package/src/launch.js +0 -26
- package/src/live-view.js +0 -81
- package/src/stealth.js +0 -75
- package/src/utils.js +0 -22
package/dist/index.js
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ApifyKit } from "./src/apify-kit.js";
|
|
2
|
+
import { Utils } from "./src/utils.js";
|
|
3
|
+
import { Stealth } from "./src/stealth.js";
|
|
4
|
+
import { Humanize } from "./src/humanize.js";
|
|
5
|
+
import { Launch } from "./src/launch.js";
|
|
6
|
+
import { LiveView } from "./src/live-view.js";
|
|
7
|
+
import * as Constants from "./src/constants.js";
|
|
8
|
+
const usePlaywrightToolKit = () => {
|
|
9
|
+
return {
|
|
10
|
+
ApifyKit,
|
|
11
|
+
Stealth,
|
|
12
|
+
Humanize,
|
|
13
|
+
Launch,
|
|
14
|
+
LiveView,
|
|
15
|
+
Constants,
|
|
16
|
+
Utils
|
|
17
|
+
};
|
|
18
|
+
};
|
|
19
|
+
export {
|
|
20
|
+
usePlaywrightToolKit
|
|
21
|
+
};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { log } from "crawlee";
|
|
2
|
+
import { Actor } from "apify";
|
|
3
|
+
import { Status, FAILED_KEY_SEPARATOR, StatusCode } from "./constants.js";
|
|
4
|
+
const ApifyKit = {
|
|
5
|
+
/**
|
|
6
|
+
* 包装 Step Name
|
|
7
|
+
*/
|
|
8
|
+
wrapStepNameWithFailedKey(key, stepName) {
|
|
9
|
+
return `${key}${FAILED_KEY_SEPARATOR}${stepName}`;
|
|
10
|
+
},
|
|
11
|
+
/**
|
|
12
|
+
* 解包 Step Name
|
|
13
|
+
*/
|
|
14
|
+
unwrapStepName(stepName) {
|
|
15
|
+
const splitIndex = stepName.indexOf(FAILED_KEY_SEPARATOR);
|
|
16
|
+
if (splitIndex === -1) {
|
|
17
|
+
return ["-", stepName];
|
|
18
|
+
}
|
|
19
|
+
const key = stepName.substring(0, splitIndex);
|
|
20
|
+
const value = stepName.substring(splitIndex + FAILED_KEY_SEPARATOR.length);
|
|
21
|
+
return [key, value];
|
|
22
|
+
},
|
|
23
|
+
/**
|
|
24
|
+
* 核心封装:执行步骤,带自动日志确认和失败截图处理
|
|
25
|
+
*/
|
|
26
|
+
async runStep(pendingStepName, page, actionFn, options = {}) {
|
|
27
|
+
const { failActor = true } = options;
|
|
28
|
+
const [failedKey, stepName] = this.unwrapStepName(pendingStepName);
|
|
29
|
+
log.info(`\u{1F504} [\u6B63\u5728\u6267\u884C] ${stepName}...`);
|
|
30
|
+
try {
|
|
31
|
+
const result = await actionFn();
|
|
32
|
+
log.info(`\u2705 [\u6267\u884C\u6210\u529F] ${stepName}`);
|
|
33
|
+
return result;
|
|
34
|
+
} catch (error) {
|
|
35
|
+
log.error(`\u274C [\u6267\u884C\u5931\u8D25] ${stepName}: ${error.message}`);
|
|
36
|
+
let screenshotBase64 = "\u622A\u56FE\u5931\u8D25";
|
|
37
|
+
try {
|
|
38
|
+
if (page) {
|
|
39
|
+
const buffer = await page.screenshot({ fullPage: true, type: "jpeg", quality: 60 });
|
|
40
|
+
screenshotBase64 = `data:image/jpeg;base64,${buffer.toString("base64")}`;
|
|
41
|
+
}
|
|
42
|
+
} catch (snapErr) {
|
|
43
|
+
log.warning(`\u622A\u56FE\u751F\u6210\u5931\u8D25: ${snapErr.message}`);
|
|
44
|
+
}
|
|
45
|
+
await this.pushFailed(error, {
|
|
46
|
+
failedStep: stepName,
|
|
47
|
+
failedKey,
|
|
48
|
+
errorMessage: error.message,
|
|
49
|
+
errorStack: error.stack,
|
|
50
|
+
screenshotBase64
|
|
51
|
+
});
|
|
52
|
+
if (failActor) {
|
|
53
|
+
await Actor.fail(`Run Step ${stepName} \u5931\u8D25: ${error.message}`);
|
|
54
|
+
} else {
|
|
55
|
+
throw error;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
/**
|
|
60
|
+
* 宽松版runStep:失败时不调用Actor.fail,只抛出异常
|
|
61
|
+
*/
|
|
62
|
+
async runStepLoose(stepName, page, fn) {
|
|
63
|
+
return await this.runStep(stepName, page, fn, { failActor: false });
|
|
64
|
+
},
|
|
65
|
+
/**
|
|
66
|
+
* 推送成功数据的通用方法
|
|
67
|
+
* @param {Object} data - 要推送的数据对象
|
|
68
|
+
*/
|
|
69
|
+
async pushSuccess(data) {
|
|
70
|
+
await Actor.pushData({
|
|
71
|
+
code: StatusCode.Success,
|
|
72
|
+
status: Status.Success,
|
|
73
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
74
|
+
...data
|
|
75
|
+
});
|
|
76
|
+
},
|
|
77
|
+
/**
|
|
78
|
+
* 推送失败数据的通用方法(私有方法,仅供runStep内部使用)
|
|
79
|
+
* @param {Error|Object} error - 错误对象(可包含其他的错误/或部分处理成功的额外信息)
|
|
80
|
+
* @param {Object} [meta] - 额外的数据(如failedStep, screenshotBase64等,仅runStep使用)
|
|
81
|
+
* @private
|
|
82
|
+
*/
|
|
83
|
+
async pushFailed(error, meta = {}) {
|
|
84
|
+
await Actor.pushData({
|
|
85
|
+
code: StatusCode.Failed,
|
|
86
|
+
status: Status.Failed,
|
|
87
|
+
// 这里可能带其他错误信息
|
|
88
|
+
error,
|
|
89
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
90
|
+
...meta
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
export {
|
|
95
|
+
ApifyKit
|
|
96
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
const ErrorKeygen = {
|
|
2
|
+
NotLogin: 30000001,
|
|
3
|
+
Chaptcha: 30000002
|
|
4
|
+
};
|
|
5
|
+
const Status = {
|
|
6
|
+
Success: "SUCCESS",
|
|
7
|
+
Failed: "FAILED"
|
|
8
|
+
};
|
|
9
|
+
const StatusCode = {
|
|
10
|
+
Success: 0,
|
|
11
|
+
Failed: -1
|
|
12
|
+
};
|
|
13
|
+
const FAILED_KEY_SEPARATOR = "::<@>::";
|
|
14
|
+
const PresetOfLiveViewKey = "LIVE_VIEW_SCREENSHOT";
|
|
15
|
+
export {
|
|
16
|
+
ErrorKeygen,
|
|
17
|
+
FAILED_KEY_SEPARATOR,
|
|
18
|
+
PresetOfLiveViewKey,
|
|
19
|
+
Status,
|
|
20
|
+
StatusCode
|
|
21
|
+
};
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import delay from "delay";
|
|
2
|
+
import { log } from "crawlee";
|
|
3
|
+
const Humanize = {
|
|
4
|
+
/**
|
|
5
|
+
* 随机延迟一段毫秒数 (API Wrapper for 'delay' package)
|
|
6
|
+
* @param {number} min - 最小毫秒
|
|
7
|
+
* @param {number} max - 最大毫秒
|
|
8
|
+
*/
|
|
9
|
+
async randomSleep(min, max) {
|
|
10
|
+
const ms = typeof max === "number" ? delay.range(min, max) : delay(min);
|
|
11
|
+
await ms;
|
|
12
|
+
},
|
|
13
|
+
/**
|
|
14
|
+
* 模拟人类“注视”或“阅读”行为:鼠标在页面上随机微动。
|
|
15
|
+
* @param {import('ghost-cursor-playwright').GhostCursor} cursor
|
|
16
|
+
* @param {number} durationMs - 持续时间
|
|
17
|
+
*/
|
|
18
|
+
async simulateGaze(cursor, durationMs = 2e3) {
|
|
19
|
+
const startTime = Date.now();
|
|
20
|
+
while (Date.now() - startTime < durationMs) {
|
|
21
|
+
const x = Math.random() * 800;
|
|
22
|
+
const y = Math.random() * 600;
|
|
23
|
+
await cursor.moveTo({ x, y });
|
|
24
|
+
await delay.range(200, 800);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
export {
|
|
29
|
+
Humanize
|
|
30
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Stealth } from "./stealth.js";
|
|
2
|
+
const Launch = {
|
|
3
|
+
getLaunchOptions(customArgs = []) {
|
|
4
|
+
return {
|
|
5
|
+
args: [
|
|
6
|
+
...Stealth.getStealthLaunchArgs(),
|
|
7
|
+
...customArgs
|
|
8
|
+
],
|
|
9
|
+
ignoreDefaultArgs: ["--enable-automation"]
|
|
10
|
+
};
|
|
11
|
+
},
|
|
12
|
+
/**
|
|
13
|
+
* 推荐的 Fingerprint Generator 选项
|
|
14
|
+
* 确保生成的是桌面端、较新的 Chrome,以匹配我们的脚本逻辑
|
|
15
|
+
*/
|
|
16
|
+
getFingerprintGeneratorOptions() {
|
|
17
|
+
return {
|
|
18
|
+
browsers: [{ name: "chrome", minVersion: 110 }],
|
|
19
|
+
devices: ["desktop"],
|
|
20
|
+
operatingSystems: ["windows", "linux"]
|
|
21
|
+
// 包含 Linux 兼容容器
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
export {
|
|
26
|
+
Launch
|
|
27
|
+
};
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import express from "express";
|
|
2
|
+
import { log } from "crawlee";
|
|
3
|
+
import { Actor } from "apify";
|
|
4
|
+
import { PresetOfLiveViewKey } from "./constants";
|
|
5
|
+
async function startLiveViewServer(liveViewKey) {
|
|
6
|
+
const app = express();
|
|
7
|
+
app.get("/", async (req, res) => {
|
|
8
|
+
try {
|
|
9
|
+
const screenshotBuffer = await Actor.getValue(liveViewKey);
|
|
10
|
+
if (!screenshotBuffer) {
|
|
11
|
+
res.send('<html><head><meta http-equiv="refresh" content="2"></head><body>\u7B49\u5F85\u7B2C\u4E00\u4E2A\u5C4F\u5E55\u622A\u56FE...</body></html>');
|
|
12
|
+
return;
|
|
13
|
+
}
|
|
14
|
+
const screenshotBase64 = screenshotBuffer.toString("base64");
|
|
15
|
+
res.send(`
|
|
16
|
+
<html>
|
|
17
|
+
<head>
|
|
18
|
+
<title>Live View (\u622A\u56FE)</title>
|
|
19
|
+
<meta http-equiv="refresh" content="1">
|
|
20
|
+
</head>
|
|
21
|
+
<body style="margin:0; padding:0;">
|
|
22
|
+
<img src="data:image/png;base64,${screenshotBase64}"
|
|
23
|
+
alt="Live View Screenshot"
|
|
24
|
+
style="width: 100%; height: auto;" />
|
|
25
|
+
</body>
|
|
26
|
+
</html>
|
|
27
|
+
`);
|
|
28
|
+
} catch (error) {
|
|
29
|
+
log.error(`Live View \u670D\u52A1\u5668\u9519\u8BEF: ${error.message}`);
|
|
30
|
+
res.status(500).send(`\u65E0\u6CD5\u52A0\u8F7D\u5C4F\u5E55\u622A\u56FE: ${error.message}`);
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
const port = process.env.APIFY_CONTAINER_PORT || 4321;
|
|
34
|
+
app.listen(port, () => {
|
|
35
|
+
log.info(`Live View \u670D\u52A1\u5668\u5DF2\u542F\u52A8\uFF0C\u76D1\u542C\u7AEF\u53E3 ${port}\u3002\u8BF7\u6253\u5F00 "Live View" \u9009\u9879\u5361\u67E5\u770B\u3002`);
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
async function takeLiveScreenshot(liveViewKey, page, logMessage) {
|
|
39
|
+
try {
|
|
40
|
+
const buffer = await page.screenshot({ type: "png" });
|
|
41
|
+
await Actor.setValue(liveViewKey, buffer, { contentType: "image/png" });
|
|
42
|
+
if (logMessage) {
|
|
43
|
+
log.info(`(\u622A\u56FE): ${logMessage}`);
|
|
44
|
+
}
|
|
45
|
+
} catch (e) {
|
|
46
|
+
log.warning(`\u65E0\u6CD5\u6355\u83B7 Live View \u5C4F\u5E55\u622A\u56FE: ${e.message}`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const useLiveView = (liveViewKey = PresetOfLiveViewKey) => {
|
|
50
|
+
return {
|
|
51
|
+
takeLiveScreenshot: async (page, logMessage) => {
|
|
52
|
+
return await takeLiveScreenshot(liveViewKey, page, logMessage);
|
|
53
|
+
},
|
|
54
|
+
startLiveViewServer: async () => {
|
|
55
|
+
return await startLiveViewServer(liveViewKey);
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
};
|
|
59
|
+
const LiveView = {
|
|
60
|
+
useLiveView
|
|
61
|
+
};
|
|
62
|
+
export {
|
|
63
|
+
LiveView
|
|
64
|
+
};
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { log } from "crawlee";
|
|
2
|
+
const Stealth = {
|
|
3
|
+
/**
|
|
4
|
+
* 关键修复:将 Page 视口调整为与浏览器指纹 (window.screen) 一致。
|
|
5
|
+
* 防止 "Viewport Mismatch" 类型的反爬检测。
|
|
6
|
+
* @param {import('playwright').Page} page
|
|
7
|
+
*/
|
|
8
|
+
async syncViewportWithScreen(page) {
|
|
9
|
+
try {
|
|
10
|
+
const screen = await page.evaluate(() => ({
|
|
11
|
+
width: window.screen.width,
|
|
12
|
+
height: window.screen.height,
|
|
13
|
+
availWidth: window.screen.availWidth,
|
|
14
|
+
availHeight: window.screen.availHeight
|
|
15
|
+
}));
|
|
16
|
+
await page.setViewportSize({
|
|
17
|
+
width: screen.width,
|
|
18
|
+
height: screen.height
|
|
19
|
+
});
|
|
20
|
+
log.info(`[Stealth] Viewport synced to fingerprint: ${screen.width}x${screen.height}`);
|
|
21
|
+
} catch (e) {
|
|
22
|
+
log.warning(`[Stealth] Failed to sync viewport: ${e.message}. Fallback to 1920x1080.`);
|
|
23
|
+
await page.setViewportSize({ width: 1920, height: 1080 });
|
|
24
|
+
}
|
|
25
|
+
},
|
|
26
|
+
/**
|
|
27
|
+
* 确保 navigator.webdriver 隐藏 (通常 Playwright Stealth 插件已处理,但双重保险)
|
|
28
|
+
*/
|
|
29
|
+
async hideWebdriver(page) {
|
|
30
|
+
await page.addInitScript(() => {
|
|
31
|
+
Object.defineProperty(navigator, "webdriver", {
|
|
32
|
+
get: () => false
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
},
|
|
36
|
+
/**
|
|
37
|
+
* 通用的 Playwright 资源拦截器,用于屏蔽不必要的资源以加速加载
|
|
38
|
+
* @param {import('playwright').Page} page
|
|
39
|
+
* @param {string[]} [resourceTypes] - 要屏蔽的资源类型,默认为 ['font', 'image', 'media']
|
|
40
|
+
*/
|
|
41
|
+
async setupBlockingResources(page, resourceTypes = ["font", "image", "media"]) {
|
|
42
|
+
await page.route("**/*", (route) => {
|
|
43
|
+
const request = route.request();
|
|
44
|
+
const type = request.resourceType();
|
|
45
|
+
if (resourceTypes.includes(type)) {
|
|
46
|
+
return route.abort();
|
|
47
|
+
}
|
|
48
|
+
return route.continue();
|
|
49
|
+
});
|
|
50
|
+
},
|
|
51
|
+
/**
|
|
52
|
+
* 获取推荐的 Stealth 启动参数
|
|
53
|
+
*/
|
|
54
|
+
getStealthLaunchArgs() {
|
|
55
|
+
return [
|
|
56
|
+
"--disable-blink-features=AutomationControlled",
|
|
57
|
+
"--no-sandbox",
|
|
58
|
+
"--disable-setuid-sandbox",
|
|
59
|
+
"--disable-infobars",
|
|
60
|
+
"--window-position=0,0",
|
|
61
|
+
"--ignore-certificate-errors",
|
|
62
|
+
"--disable-web-security"
|
|
63
|
+
// 注意:不建议这里强制指定 window-size,让 syncViewportWithScreen 去动态调整
|
|
64
|
+
// '--window-size=1920,1080'
|
|
65
|
+
];
|
|
66
|
+
}
|
|
67
|
+
};
|
|
68
|
+
export {
|
|
69
|
+
Stealth
|
|
70
|
+
};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
const Utils = {
|
|
2
|
+
/**
|
|
3
|
+
* 解析 SSE 流文本
|
|
4
|
+
*/
|
|
5
|
+
parseSseStream(sseStreamText) {
|
|
6
|
+
const events = [];
|
|
7
|
+
const lines = sseStreamText.split("\n");
|
|
8
|
+
for (const line of lines) {
|
|
9
|
+
if (line.startsWith("data: ")) {
|
|
10
|
+
try {
|
|
11
|
+
const jsonContent = line.substring(6).trim();
|
|
12
|
+
if (jsonContent && jsonContent !== "[DONE]") {
|
|
13
|
+
events.push(JSON.parse(jsonContent));
|
|
14
|
+
}
|
|
15
|
+
} catch (e) {
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return events;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
export {
|
|
23
|
+
Utils
|
|
24
|
+
};
|
package/package.json
CHANGED
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@skrillex1224/playwright-toolkit",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.2",
|
|
4
4
|
"description": "一个在 Apify/Crawlee Actor 中启用实时截图视图的实用工具库。",
|
|
5
|
-
"main": "index.js",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|
|
7
7
|
"scripts": {
|
|
8
|
+
"build": "node build.js",
|
|
9
|
+
"prepublishOnly": "npm run build",
|
|
8
10
|
"test": "echo \"Error: no test specified\" && exit 1"
|
|
9
11
|
},
|
|
12
|
+
"files": [
|
|
13
|
+
"dist/",
|
|
14
|
+
"docs/",
|
|
15
|
+
"README.md"
|
|
16
|
+
],
|
|
10
17
|
"keywords": [
|
|
11
18
|
"apify",
|
|
12
19
|
"crawlee",
|
|
@@ -24,5 +31,8 @@
|
|
|
24
31
|
"apify": "*",
|
|
25
32
|
"crawlee": "*",
|
|
26
33
|
"playwright": "*"
|
|
34
|
+
},
|
|
35
|
+
"devDependencies": {
|
|
36
|
+
"esbuild": "^0.24.2"
|
|
27
37
|
}
|
|
28
38
|
}
|
package/index.js
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
import { ApifyKit } from './src/apify-kit.js';
|
|
2
|
-
import { Utils } from './src/utils.js';
|
|
3
|
-
import { Stealth } from './src/stealth.js';
|
|
4
|
-
import { Humanize } from './src/humanize.js';
|
|
5
|
-
import { Launch } from './src/launch.js';
|
|
6
|
-
import { LiveView } from './src/live-view.js';
|
|
7
|
-
import * as Constants from './src/constants.js';
|
|
8
|
-
|
|
9
|
-
// Unified Entry Point
|
|
10
|
-
export const usePlaywrightToolKit = () => {
|
|
11
|
-
return {
|
|
12
|
-
ApifyKit,
|
|
13
|
-
|
|
14
|
-
Stealth,
|
|
15
|
-
Humanize,
|
|
16
|
-
Launch,
|
|
17
|
-
LiveView,
|
|
18
|
-
Constants,
|
|
19
|
-
Utils
|
|
20
|
-
};
|
|
21
|
-
};
|
package/src/apify-kit.js
DELETED
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import { log } from 'crawlee';
|
|
2
|
-
import { Actor } from 'apify';
|
|
3
|
-
import { Status, FAILED_KEY_SEPARATOR, StatusCode } from './constants.js';
|
|
4
|
-
|
|
5
|
-
export const ApifyKit = {
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* 包装 Step Name
|
|
9
|
-
*/
|
|
10
|
-
wrapStepNameWithFailedKey(key, stepName) {
|
|
11
|
-
return `${key}${FAILED_KEY_SEPARATOR}${stepName}`;
|
|
12
|
-
},
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
* 解包 Step Name
|
|
16
|
-
*/
|
|
17
|
-
unwrapStepName(stepName) {
|
|
18
|
-
const splitIndex = stepName.indexOf(FAILED_KEY_SEPARATOR);
|
|
19
|
-
if (splitIndex === -1) {
|
|
20
|
-
return ['-', stepName];
|
|
21
|
-
}
|
|
22
|
-
const key = stepName.substring(0, splitIndex);
|
|
23
|
-
const value = stepName.substring(splitIndex + FAILED_KEY_SEPARATOR.length);
|
|
24
|
-
return [key, value];
|
|
25
|
-
},
|
|
26
|
-
|
|
27
|
-
/**
|
|
28
|
-
* 核心封装:执行步骤,带自动日志确认和失败截图处理
|
|
29
|
-
*/
|
|
30
|
-
async runStep(pendingStepName, page, actionFn, options = {}) {
|
|
31
|
-
const { failActor = true } = options; // 默认调用 Actor.fail
|
|
32
|
-
const [failedKey, stepName] = this.unwrapStepName(pendingStepName);
|
|
33
|
-
|
|
34
|
-
log.info(`🔄 [正在执行] ${stepName}...`);
|
|
35
|
-
|
|
36
|
-
try {
|
|
37
|
-
const result = await actionFn();
|
|
38
|
-
log.info(`✅ [执行成功] ${stepName}`);
|
|
39
|
-
return result;
|
|
40
|
-
} catch (error) {
|
|
41
|
-
log.error(`❌ [执行失败] ${stepName}: ${error.message}`);
|
|
42
|
-
|
|
43
|
-
let screenshotBase64 = '截图失败';
|
|
44
|
-
try {
|
|
45
|
-
if (page) {
|
|
46
|
-
const buffer = await page.screenshot({ fullPage: true, type: 'jpeg', quality: 60 });
|
|
47
|
-
screenshotBase64 = `data:image/jpeg;base64,${buffer.toString('base64')}`;
|
|
48
|
-
}
|
|
49
|
-
} catch (snapErr) {
|
|
50
|
-
log.warning(`截图生成失败: ${snapErr.message}`);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// 使用 pushFailed 方法推送失败数据(私有使用)
|
|
54
|
-
await this.pushFailed(error, {
|
|
55
|
-
failedStep: stepName,
|
|
56
|
-
failedKey: failedKey,
|
|
57
|
-
errorMessage: error.message,
|
|
58
|
-
errorStack: error.stack,
|
|
59
|
-
screenshotBase64: screenshotBase64
|
|
60
|
-
});
|
|
61
|
-
|
|
62
|
-
// 根据 failActor 决定是否调用 Actor.fail
|
|
63
|
-
if (failActor) {
|
|
64
|
-
await Actor.fail(`Run Step ${stepName} 失败: ${error.message}`);
|
|
65
|
-
} else {
|
|
66
|
-
// 不调用 Actor.fail,直接抛出错误
|
|
67
|
-
throw error;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
},
|
|
71
|
-
|
|
72
|
-
/**
|
|
73
|
-
* 宽松版runStep:失败时不调用Actor.fail,只抛出异常
|
|
74
|
-
*/
|
|
75
|
-
async runStepLoose(stepName, page, fn) {
|
|
76
|
-
return await this.runStep(stepName, page, fn, { failActor: false });
|
|
77
|
-
},
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* 推送成功数据的通用方法
|
|
81
|
-
* @param {Object} data - 要推送的数据对象
|
|
82
|
-
*/
|
|
83
|
-
async pushSuccess(data) {
|
|
84
|
-
await Actor.pushData({
|
|
85
|
-
code: StatusCode.Success,
|
|
86
|
-
status: Status.Success,
|
|
87
|
-
timestamp: new Date().toISOString(),
|
|
88
|
-
...data
|
|
89
|
-
});
|
|
90
|
-
},
|
|
91
|
-
|
|
92
|
-
/**
|
|
93
|
-
* 推送失败数据的通用方法(私有方法,仅供runStep内部使用)
|
|
94
|
-
* @param {Error|Object} error - 错误对象(可包含其他的错误/或部分处理成功的额外信息)
|
|
95
|
-
* @param {Object} [meta] - 额外的数据(如failedStep, screenshotBase64等,仅runStep使用)
|
|
96
|
-
* @private
|
|
97
|
-
*/
|
|
98
|
-
async pushFailed(error, meta = {}) {
|
|
99
|
-
await Actor.pushData({
|
|
100
|
-
code: StatusCode.Failed,
|
|
101
|
-
status: Status.Failed,
|
|
102
|
-
// 这里可能带其他错误信息
|
|
103
|
-
error,
|
|
104
|
-
timestamp: new Date().toISOString(),
|
|
105
|
-
...meta
|
|
106
|
-
});
|
|
107
|
-
}
|
|
108
|
-
}
|
package/src/constants.js
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
export const ErrorKeygen = {
|
|
2
|
-
NotLogin: 30000001,
|
|
3
|
-
Chaptcha: 30000002,
|
|
4
|
-
}
|
|
5
|
-
|
|
6
|
-
export const Status = {
|
|
7
|
-
Success: 'SUCCESS',
|
|
8
|
-
Failed: 'FAILED'
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export const StatusCode = {
|
|
12
|
-
Success: 0,
|
|
13
|
-
Failed: -1
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
export const FAILED_KEY_SEPARATOR = '::<@>::';
|
|
17
|
-
|
|
18
|
-
export const PresetOfLiveViewKey = 'LIVE_VIEW_SCREENSHOT';
|
package/src/humanize.js
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import delay from 'delay';
|
|
2
|
-
import { log } from 'crawlee';
|
|
3
|
-
|
|
4
|
-
export const Humanize = {
|
|
5
|
-
/**
|
|
6
|
-
* 随机延迟一段毫秒数 (API Wrapper for 'delay' package)
|
|
7
|
-
* @param {number} min - 最小毫秒
|
|
8
|
-
* @param {number} max - 最大毫秒
|
|
9
|
-
*/
|
|
10
|
-
async randomSleep(min, max) {
|
|
11
|
-
const ms = typeof max === 'number'
|
|
12
|
-
? delay.range(min, max)
|
|
13
|
-
: delay(min); // 如果只传一个参数,视为固定延迟或最小延迟
|
|
14
|
-
|
|
15
|
-
// log.debug(`[Humanize] Sleeping for ${await ms} ms...`); // delay return promise acts like number somewhat but best await it
|
|
16
|
-
// The delay package returns a promise that resolves after the delay.
|
|
17
|
-
// delay.range() returns a promise too.
|
|
18
|
-
|
|
19
|
-
await ms;
|
|
20
|
-
},
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* 模拟人类“注视”或“阅读”行为:鼠标在页面上随机微动。
|
|
24
|
-
* @param {import('ghost-cursor-playwright').GhostCursor} cursor
|
|
25
|
-
* @param {number} durationMs - 持续时间
|
|
26
|
-
*/
|
|
27
|
-
async simulateGaze(cursor, durationMs = 2000) {
|
|
28
|
-
const startTime = Date.now();
|
|
29
|
-
while (Date.now() - startTime < durationMs) {
|
|
30
|
-
// 随机小幅度移动
|
|
31
|
-
const x = Math.random() * 800;
|
|
32
|
-
const y = Math.random() * 600;
|
|
33
|
-
await cursor.moveTo({ x, y });
|
|
34
|
-
await delay.range(200, 800);
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
}
|
package/src/launch.js
DELETED
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
// 集中管理启动配置,暂时主要由 Stealth 模块提供 Args,这里作为扩展点
|
|
2
|
-
import { Stealth } from './stealth.js';
|
|
3
|
-
|
|
4
|
-
export const Launch = {
|
|
5
|
-
getLaunchOptions(customArgs = []) {
|
|
6
|
-
return {
|
|
7
|
-
args: [
|
|
8
|
-
...Stealth.getStealthLaunchArgs(),
|
|
9
|
-
...customArgs
|
|
10
|
-
],
|
|
11
|
-
ignoreDefaultArgs: ['--enable-automation'],
|
|
12
|
-
};
|
|
13
|
-
},
|
|
14
|
-
|
|
15
|
-
/**
|
|
16
|
-
* 推荐的 Fingerprint Generator 选项
|
|
17
|
-
* 确保生成的是桌面端、较新的 Chrome,以匹配我们的脚本逻辑
|
|
18
|
-
*/
|
|
19
|
-
getFingerprintGeneratorOptions() {
|
|
20
|
-
return {
|
|
21
|
-
browsers: [{ name: 'chrome', minVersion: 110 }],
|
|
22
|
-
devices: ['desktop'],
|
|
23
|
-
operatingSystems: ['windows', 'linux'], // 包含 Linux 兼容容器
|
|
24
|
-
};
|
|
25
|
-
}
|
|
26
|
-
}
|
package/src/live-view.js
DELETED
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import express from 'express';
|
|
2
|
-
import { log } from 'crawlee';
|
|
3
|
-
import { Actor } from 'apify';
|
|
4
|
-
import { PresetOfLiveViewKey } from './constants';
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 启动一个 Web 服务器以在 Live View 选项卡中显示最新的屏幕截图。
|
|
8
|
-
*/
|
|
9
|
-
async function startLiveViewServer(liveViewKey) {
|
|
10
|
-
const app = express();
|
|
11
|
-
|
|
12
|
-
app.get('/', async (req, res) => {
|
|
13
|
-
try {
|
|
14
|
-
// 从默认的 Key-Value Store 中读取最新的屏幕截图
|
|
15
|
-
const screenshotBuffer = await Actor.getValue(liveViewKey);
|
|
16
|
-
|
|
17
|
-
if (!screenshotBuffer) {
|
|
18
|
-
// 如果还没有截图,发送一个自动刷新的占位页面
|
|
19
|
-
res.send('<html><head><meta http-equiv="refresh" content="2"></head><body>等待第一个屏幕截图...</body></html>');
|
|
20
|
-
return;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
// 将 Buffer 转换为 Base64 字符串
|
|
24
|
-
const screenshotBase64 = screenshotBuffer.toString('base64');
|
|
25
|
-
|
|
26
|
-
// 发送一个 HTML 页面,该页面每 1 秒自动刷新一次,并显示截图
|
|
27
|
-
res.send(`
|
|
28
|
-
<html>
|
|
29
|
-
<head>
|
|
30
|
-
<title>Live View (截图)</title>
|
|
31
|
-
<meta http-equiv="refresh" content="1">
|
|
32
|
-
</head>
|
|
33
|
-
<body style="margin:0; padding:0;">
|
|
34
|
-
<img src="data:image/png;base64,${screenshotBase64}"
|
|
35
|
-
alt="Live View Screenshot"
|
|
36
|
-
style="width: 100%; height: auto;" />
|
|
37
|
-
</body>
|
|
38
|
-
</html>
|
|
39
|
-
`);
|
|
40
|
-
} catch (error) {
|
|
41
|
-
log.error(`Live View 服务器错误: ${error.message}`);
|
|
42
|
-
res.status(500).send(`无法加载屏幕截图: ${error.message}`);
|
|
43
|
-
}
|
|
44
|
-
});
|
|
45
|
-
|
|
46
|
-
// 监听 Apify 容器端口
|
|
47
|
-
const port = process.env.APIFY_CONTAINER_PORT || 4321;
|
|
48
|
-
app.listen(port, () => { log.info(`Live View 服务器已启动,监听端口 ${port}。请打开 "Live View" 选项卡查看。`); });
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
/**
|
|
52
|
-
* 拍摄当前页面的屏幕截图并将其保存到 Key-Value Store。
|
|
53
|
-
* @param {import('playwright').Page} page
|
|
54
|
-
* @param {string} [logMessage] - 可选的日志消息。
|
|
55
|
-
*/
|
|
56
|
-
async function takeLiveScreenshot(liveViewKey, page, logMessage) {
|
|
57
|
-
try {
|
|
58
|
-
const buffer = await page.screenshot({ type: 'png' });
|
|
59
|
-
await Actor.setValue(liveViewKey, buffer, { contentType: 'image/png' });
|
|
60
|
-
if (logMessage) {
|
|
61
|
-
log.info(`(截图): ${logMessage}`);
|
|
62
|
-
}
|
|
63
|
-
} catch (e) {
|
|
64
|
-
log.warning(`无法捕获 Live View 屏幕截图: ${e.message}`);
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const useLiveView = (liveViewKey = PresetOfLiveViewKey) => {
|
|
69
|
-
return {
|
|
70
|
-
takeLiveScreenshot: async (page, logMessage) => {
|
|
71
|
-
return await takeLiveScreenshot(liveViewKey, page, logMessage)
|
|
72
|
-
},
|
|
73
|
-
startLiveViewServer: async () => {
|
|
74
|
-
return await startLiveViewServer(liveViewKey);
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
export const LiveView = {
|
|
80
|
-
useLiveView,
|
|
81
|
-
};
|
package/src/stealth.js
DELETED
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
import { log } from 'crawlee';
|
|
2
|
-
|
|
3
|
-
export const Stealth = {
|
|
4
|
-
/**
|
|
5
|
-
* 关键修复:将 Page 视口调整为与浏览器指纹 (window.screen) 一致。
|
|
6
|
-
* 防止 "Viewport Mismatch" 类型的反爬检测。
|
|
7
|
-
* @param {import('playwright').Page} page
|
|
8
|
-
*/
|
|
9
|
-
async syncViewportWithScreen(page) {
|
|
10
|
-
try {
|
|
11
|
-
// 获取指纹中的屏幕尺寸
|
|
12
|
-
const screen = await page.evaluate(() => ({
|
|
13
|
-
width: window.screen.width,
|
|
14
|
-
height: window.screen.height,
|
|
15
|
-
availWidth: window.screen.availWidth,
|
|
16
|
-
availHeight: window.screen.availHeight,
|
|
17
|
-
}));
|
|
18
|
-
|
|
19
|
-
// 调整视口
|
|
20
|
-
await page.setViewportSize({
|
|
21
|
-
width: screen.width,
|
|
22
|
-
height: screen.height
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
log.info(`[Stealth] Viewport synced to fingerprint: ${screen.width}x${screen.height}`);
|
|
26
|
-
} catch (e) {
|
|
27
|
-
log.warning(`[Stealth] Failed to sync viewport: ${e.message}. Fallback to 1920x1080.`);
|
|
28
|
-
await page.setViewportSize({ width: 1920, height: 1080 });
|
|
29
|
-
}
|
|
30
|
-
},
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* 确保 navigator.webdriver 隐藏 (通常 Playwright Stealth 插件已处理,但双重保险)
|
|
34
|
-
*/
|
|
35
|
-
async hideWebdriver(page) {
|
|
36
|
-
await page.addInitScript(() => {
|
|
37
|
-
Object.defineProperty(navigator, 'webdriver', {
|
|
38
|
-
get: () => false,
|
|
39
|
-
});
|
|
40
|
-
});
|
|
41
|
-
},
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* 通用的 Playwright 资源拦截器,用于屏蔽不必要的资源以加速加载
|
|
45
|
-
* @param {import('playwright').Page} page
|
|
46
|
-
* @param {string[]} [resourceTypes] - 要屏蔽的资源类型,默认为 ['font', 'image', 'media']
|
|
47
|
-
*/
|
|
48
|
-
async setupBlockingResources(page, resourceTypes = ['font', 'image', 'media']) {
|
|
49
|
-
await page.route('**/*', (route) => {
|
|
50
|
-
const request = route.request();
|
|
51
|
-
const type = request.resourceType();
|
|
52
|
-
if (resourceTypes.includes(type)) {
|
|
53
|
-
return route.abort();
|
|
54
|
-
}
|
|
55
|
-
return route.continue();
|
|
56
|
-
});
|
|
57
|
-
},
|
|
58
|
-
|
|
59
|
-
/**
|
|
60
|
-
* 获取推荐的 Stealth 启动参数
|
|
61
|
-
*/
|
|
62
|
-
getStealthLaunchArgs() {
|
|
63
|
-
return [
|
|
64
|
-
'--disable-blink-features=AutomationControlled',
|
|
65
|
-
'--no-sandbox',
|
|
66
|
-
'--disable-setuid-sandbox',
|
|
67
|
-
'--disable-infobars',
|
|
68
|
-
'--window-position=0,0',
|
|
69
|
-
'--ignore-certificate-errors',
|
|
70
|
-
'--disable-web-security',
|
|
71
|
-
// 注意:不建议这里强制指定 window-size,让 syncViewportWithScreen 去动态调整
|
|
72
|
-
// '--window-size=1920,1080'
|
|
73
|
-
];
|
|
74
|
-
}
|
|
75
|
-
}
|
package/src/utils.js
DELETED
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
export const Utils = {
|
|
2
|
-
/**
|
|
3
|
-
* 解析 SSE 流文本
|
|
4
|
-
*/
|
|
5
|
-
parseSseStream(sseStreamText) {
|
|
6
|
-
const events = [];
|
|
7
|
-
const lines = sseStreamText.split('\n');
|
|
8
|
-
for (const line of lines) {
|
|
9
|
-
if (line.startsWith('data: ')) {
|
|
10
|
-
try {
|
|
11
|
-
const jsonContent = line.substring(6).trim();
|
|
12
|
-
if (jsonContent && jsonContent !== '[DONE]') {
|
|
13
|
-
events.push(JSON.parse(jsonContent));
|
|
14
|
-
}
|
|
15
|
-
} catch (e) {
|
|
16
|
-
// Ignore lines that are not valid JSON
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
return events;
|
|
21
|
-
}
|
|
22
|
-
}
|