@skrillex1224/playwright-toolkit 2.0.18 → 2.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -204
- package/package.json +1 -2
- package/docs/apify-kit.md +0 -59
- package/docs/constants.md +0 -25
- package/docs/humanize.md +0 -40
- package/docs/launch.md +0 -32
- package/docs/live-view.md +0 -38
- package/docs/stealth.md +0 -55
- package/docs/utils.md +0 -28
package/README.md
CHANGED
|
@@ -1,56 +1,51 @@
|
|
|
1
|
-
# Playwright Toolkit
|
|
1
|
+
# Playwright Toolkit
|
|
2
2
|
|
|
3
|
-
>
|
|
4
|
-
> **用途**: 面向 Apify/Crawlee Actor 开发者的实用工具库。
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
一个面向 Apify/Crawlee Actor 开发者的实用工具库,提供实时截图展示(Live View)、健壮的步骤执行封装、以及常用的 Playwright 优化工具。
|
|
3
|
+
> 面向 Apify/Crawlee Actor 开发者的实用工具库,提供反检测、拟人化操作、实时截图等功能。
|
|
8
4
|
|
|
9
5
|
## 📦 安装
|
|
10
6
|
|
|
11
7
|
```bash
|
|
12
8
|
npm install @skrillex1224/playwright-toolkit
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## 🛡️ 反检测功能 (Anti-Detection)
|
|
16
|
-
|
|
17
|
-
本工具库提供多层次的反检测能力,帮助绕过常见的爬虫检测机制。
|
|
18
|
-
|
|
19
|
-
### 反检测层次架构
|
|
20
9
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
| **行为层** | 机械输入/点击/滚动 | 人类化模拟 | `ghost-cursor-playwright` |
|
|
25
|
-
| **页面层** | 验证码/风控检测 | 监控器 | toolkit 内置 |
|
|
26
|
-
|
|
27
|
-
> **注意**: `rebrowser-playwright` 与 `playwright-extra` 不兼容,如需要 CDP 层反检测,需要选择其一。
|
|
10
|
+
# 反检测所需的依赖
|
|
11
|
+
npm install playwright-extra puppeteer-extra-plugin-stealth ghost-cursor-playwright
|
|
12
|
+
```
|
|
28
13
|
|
|
29
|
-
|
|
14
|
+
## 🚀 快速开始
|
|
30
15
|
|
|
31
16
|
```javascript
|
|
17
|
+
import { Actor } from 'apify';
|
|
18
|
+
import { PlaywrightCrawler } from 'crawlee';
|
|
32
19
|
import { chromium } from 'playwright-extra';
|
|
33
20
|
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
34
21
|
import { createCursor } from 'ghost-cursor-playwright';
|
|
35
22
|
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
36
23
|
|
|
37
|
-
|
|
24
|
+
await Actor.init();
|
|
38
25
|
|
|
39
|
-
//
|
|
26
|
+
// 初始化工具箱
|
|
27
|
+
const { ApifyKit: KitHook, Launch, Stealth, Humanize, Captcha, LiveView, Constants } = usePlaywrightToolKit();
|
|
28
|
+
|
|
29
|
+
// ⚠️ ApifyKit 需要异步初始化
|
|
30
|
+
const ApifyKit = await KitHook.useApifyKit();
|
|
31
|
+
|
|
32
|
+
// 创建 Stealth 浏览器
|
|
40
33
|
const stealthChromium = Launch.createStealthChromium(chromium, stealthPlugin);
|
|
41
34
|
|
|
42
|
-
//
|
|
35
|
+
// LiveView
|
|
36
|
+
const { startLiveViewServer, takeLiveScreenshot } = LiveView.useLiveView();
|
|
37
|
+
|
|
43
38
|
const crawler = new PlaywrightCrawler({
|
|
44
39
|
launchContext: {
|
|
45
40
|
launcher: stealthChromium,
|
|
46
|
-
launchOptions: Launch.getAdvancedLaunchOptions(),
|
|
41
|
+
launchOptions: Launch.getAdvancedLaunchOptions(),
|
|
47
42
|
},
|
|
48
43
|
preNavigationHooks: [
|
|
49
44
|
async ({ page }) => {
|
|
50
|
-
//
|
|
45
|
+
// 同步视口 (防指纹检测)
|
|
51
46
|
await Stealth.syncViewportWithScreen(page);
|
|
52
47
|
|
|
53
|
-
//
|
|
48
|
+
// 验证码监控
|
|
54
49
|
Captcha.useCaptchaMonitor(page, {
|
|
55
50
|
domSelector: '#captcha_container',
|
|
56
51
|
onDetected: async () => { /* 处理验证码 */ }
|
|
@@ -58,226 +53,139 @@ const crawler = new PlaywrightCrawler({
|
|
|
58
53
|
}
|
|
59
54
|
],
|
|
60
55
|
requestHandler: async ({ page }) => {
|
|
61
|
-
//
|
|
56
|
+
// 创建 Ghost Cursor
|
|
62
57
|
const cursor = await Launch.createGhostCursor(page, createCursor);
|
|
63
58
|
|
|
64
|
-
//
|
|
59
|
+
// 页面预热 (模拟人类浏览)
|
|
65
60
|
await Humanize.warmUpBrowsing(page, cursor, 3000);
|
|
66
61
|
|
|
67
|
-
//
|
|
68
|
-
await
|
|
62
|
+
// 执行步骤 (失败时自动截图并调用 Actor.fail)
|
|
63
|
+
await ApifyKit.runStep('输入搜索', page, async () => {
|
|
64
|
+
await Humanize.humanType(page, 'input', '搜索内容');
|
|
65
|
+
await Humanize.humanClick(page, cursor, '#submit-btn');
|
|
66
|
+
});
|
|
69
67
|
|
|
70
|
-
//
|
|
71
|
-
await
|
|
68
|
+
// 推送成功数据
|
|
69
|
+
await ApifyKit.pushSuccess({ result: 'data' });
|
|
72
70
|
}
|
|
73
71
|
});
|
|
74
|
-
```
|
|
75
|
-
|
|
76
|
-
### 反检测 API 一览
|
|
77
|
-
|
|
78
|
-
| 模块 | 方法 | 说明 |
|
|
79
|
-
|------|------|------|
|
|
80
|
-
| `Launch` | `createStealthChromium(chromium, stealthPlugin)` | 创建已注册 Stealth 的 Chromium |
|
|
81
|
-
| `Launch` | `createGhostCursor(page, createCursor)` | 创建 Ghost Cursor 实例 |
|
|
82
|
-
| `Launch` | `getAdvancedLaunchOptions()` | 获取增强版反检测启动参数 |
|
|
83
|
-
| `Stealth` | `syncViewportWithScreen(page)` | 同步视口与屏幕指纹 |
|
|
84
|
-
| `Stealth` | `hideWebdriver(page)` | 隐藏 navigator.webdriver |
|
|
85
|
-
| `Humanize` | `humanType(page, selector, text)` | 人类化输入 (节奏变化) |
|
|
86
|
-
| `Humanize` | `humanClick(page, cursor, selector)` | 人类化点击 (贝塞尔曲线) |
|
|
87
|
-
| `Humanize` | `warmUpBrowsing(page, cursor, durationMs)` | 页面预热浏览 |
|
|
88
|
-
| `Humanize` | `naturalScroll(page, direction, distance)` | 自然滚动 (带惯性) |
|
|
89
|
-
| `Captcha` | `useCaptchaMonitor(page, options)` | 验证码监控 (DOM/URL) |
|
|
90
|
-
|
|
91
72
|
|
|
73
|
+
await startLiveViewServer();
|
|
74
|
+
await crawler.run(['https://example.com']);
|
|
75
|
+
await Actor.exit();
|
|
76
|
+
```
|
|
92
77
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
启动一个本地 Express 服务器,在 Apify 平台的 "Live View" 选项卡中实时查看浏览器当前状态。
|
|
96
|
-
|
|
97
|
-
```javascript
|
|
98
|
-
import { useLiveView } from '@skrillex1224/visitor-tools';
|
|
99
|
-
|
|
100
|
-
const liveView = useLiveView();
|
|
78
|
+
---
|
|
101
79
|
|
|
102
|
-
|
|
103
|
-
await liveView.startLiveViewServer();
|
|
80
|
+
## 🛡️ 反检测功能
|
|
104
81
|
|
|
105
|
-
|
|
106
|
-
await liveView.takeLiveScreenshot(page, "正在处理步骤 1...");
|
|
107
|
-
```
|
|
82
|
+
### 架构
|
|
108
83
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
-
|
|
84
|
+
| 层次 | 问题 | 解决方案 |
|
|
85
|
+
|------|------|----------|
|
|
86
|
+
| **指纹层** | navigator.webdriver, plugins, webgl | `puppeteer-extra-plugin-stealth` |
|
|
87
|
+
| **行为层** | 机械输入/点击/滚动 | `ghost-cursor-playwright` + Humanize |
|
|
88
|
+
| **页面层** | 验证码/风控检测 | Captcha 监控器 |
|
|
113
89
|
|
|
114
|
-
###
|
|
90
|
+
### API 一览
|
|
115
91
|
|
|
116
|
-
|
|
92
|
+
| 模块 | 方法 | 说明 |
|
|
93
|
+
|------|------|------|
|
|
94
|
+
| `Launch` | `createStealthChromium(chromium, stealthPlugin)` | 注册 Stealth 插件 |
|
|
95
|
+
| `Launch` | `createGhostCursor(page, createCursor)` | 创建 Ghost Cursor |
|
|
96
|
+
| `Launch` | `getAdvancedLaunchOptions()` | 增强版启动参数 |
|
|
97
|
+
| `Launch` | `getLaunchOptions()` | 基础启动参数 |
|
|
98
|
+
| `Launch` | `getFingerprintGeneratorOptions()` | 指纹生成器选项 |
|
|
99
|
+
| `Stealth` | `syncViewportWithScreen(page)` | 同步视口与屏幕 |
|
|
100
|
+
| `Stealth` | `hideWebdriver(page)` | 隐藏 webdriver |
|
|
101
|
+
| `Stealth` | `setupBlockingResources(page, types?)` | 资源拦截 |
|
|
102
|
+
| `Humanize` | `humanType(page, selector, text, options?)` | 人类化输入 |
|
|
103
|
+
| `Humanize` | `humanClick(page, cursor, selector, options?)` | 人类化点击 |
|
|
104
|
+
| `Humanize` | `warmUpBrowsing(page, cursor, durationMs?)` | 页面预热 |
|
|
105
|
+
| `Humanize` | `naturalScroll(page, direction?, distance?, steps?)` | 自然滚动 |
|
|
106
|
+
| `Humanize` | `simulateGaze(cursor, durationMs?)` | 模拟注视 |
|
|
107
|
+
| `Humanize` | `randomSleep(min, max)` | 随机延迟 |
|
|
108
|
+
| `Captcha` | `useCaptchaMonitor(page, options)` | 验证码监控 |
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## 📦 模块详解
|
|
113
|
+
|
|
114
|
+
### ApifyKit
|
|
115
|
+
|
|
116
|
+
**⚠️ 需要异步初始化**
|
|
117
117
|
|
|
118
118
|
```javascript
|
|
119
|
-
|
|
119
|
+
const { ApifyKit: KitHook } = usePlaywrightToolKit();
|
|
120
|
+
const ApifyKit = await KitHook.useApifyKit();
|
|
120
121
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
122
|
+
// 执行步骤 (失败时自动截图 + 推送 Dataset + 调用 Actor.fail)
|
|
123
|
+
await ApifyKit.runStep('步骤名', page, async () => {
|
|
124
|
+
// 你的逻辑
|
|
124
125
|
});
|
|
125
|
-
```
|
|
126
126
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"failedStep": "登录步骤",
|
|
132
|
-
"errorMessage": "Timeout 30000ms exceeded",
|
|
133
|
-
"errorStack": "...",
|
|
134
|
-
"screenshotBase64": "data:image/jpeg;base64,...",
|
|
135
|
-
"timestamp": "2025-12-15T03:00:00.000Z"
|
|
136
|
-
}
|
|
137
|
-
```
|
|
127
|
+
// 宽松版 (失败时只抛出异常,不调用 Actor.fail)
|
|
128
|
+
await ApifyKit.runStepLoose('步骤名', page, async () => {
|
|
129
|
+
// 你的逻辑
|
|
130
|
+
});
|
|
138
131
|
|
|
139
|
-
|
|
132
|
+
// 推送成功数据
|
|
133
|
+
await ApifyKit.pushSuccess({ key: 'value' });
|
|
140
134
|
|
|
141
|
-
|
|
135
|
+
// 步骤名包装 (用于失败分类追踪)
|
|
136
|
+
const wrappedName = ApifyKit.wrapStepNameWithFailedKey(30000001, '等待登录');
|
|
137
|
+
const [failedKey, stepName] = ApifyKit.unwrapStepName(wrappedName);
|
|
138
|
+
```
|
|
142
139
|
|
|
143
|
-
|
|
140
|
+
### LiveView
|
|
144
141
|
|
|
145
142
|
```javascript
|
|
146
|
-
|
|
147
|
-
|
|
143
|
+
const { LiveView } = usePlaywrightToolKit();
|
|
144
|
+
const { startLiveViewServer, takeLiveScreenshot } = LiveView.useLiveView();
|
|
148
145
|
|
|
149
|
-
|
|
150
|
-
await
|
|
146
|
+
await startLiveViewServer();
|
|
147
|
+
await takeLiveScreenshot(page, '当前状态');
|
|
151
148
|
```
|
|
152
149
|
|
|
153
|
-
|
|
150
|
+
### Captcha
|
|
154
151
|
|
|
155
152
|
```javascript
|
|
156
|
-
|
|
157
|
-
```
|
|
158
|
-
|
|
159
|
-
### 4. SSE 解析工具
|
|
153
|
+
const { Captcha } = usePlaywrightToolKit();
|
|
160
154
|
|
|
161
|
-
|
|
155
|
+
// DOM 监控模式
|
|
156
|
+
Captcha.useCaptchaMonitor(page, {
|
|
157
|
+
domSelector: '#captcha_container',
|
|
158
|
+
onDetected: async () => { await Actor.fail('检测到验证码'); }
|
|
159
|
+
});
|
|
162
160
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
161
|
+
// URL 监控模式
|
|
162
|
+
Captcha.useCaptchaMonitor(page, {
|
|
163
|
+
urlPattern: '/captcha',
|
|
164
|
+
onDetected: async () => { await Actor.fail('检测到验证码'); }
|
|
165
|
+
});
|
|
166
166
|
```
|
|
167
167
|
|
|
168
|
-
###
|
|
169
|
-
|
|
170
|
-
为步骤名称添加自定义的失败标识符,方便在失败时进行分类和追踪:
|
|
168
|
+
### Constants
|
|
171
169
|
|
|
172
170
|
```javascript
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
// 包装步骤名称
|
|
176
|
-
const wrappedName = wrapStepNameWithFailedKey(ErrorKeygen.NotLogin, '等待登录');
|
|
171
|
+
const { Constants } = usePlaywrightToolKit();
|
|
172
|
+
const { ErrorKeygen, Status, StatusCode } = Constants;
|
|
177
173
|
|
|
178
|
-
//
|
|
179
|
-
|
|
180
|
-
//
|
|
181
|
-
// stepName: '等待登录'
|
|
174
|
+
// ErrorKeygen: { NotLogin: 30000001, Chaptcha: 30000002 }
|
|
175
|
+
// Status: { Success: 'SUCCESS', Failed: 'FAILED' }
|
|
176
|
+
// StatusCode: { Success: 0, Failed: -1 }
|
|
182
177
|
```
|
|
183
178
|
|
|
184
|
-
|
|
179
|
+
### Utils
|
|
185
180
|
|
|
186
181
|
```javascript
|
|
187
|
-
|
|
188
|
-
import { PlaywrightCrawler } from 'crawlee';
|
|
189
|
-
import { useLiveView, Utils } from '@skrillex1224/visitor-tools';
|
|
182
|
+
const { Utils } = usePlaywrightToolKit();
|
|
190
183
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
const { startLiveViewServer, takeLiveScreenshot } = useLiveView();
|
|
194
|
-
|
|
195
|
-
const crawler = new PlaywrightCrawler({
|
|
196
|
-
preNavigationHooks: [
|
|
197
|
-
async ({ page }) => {
|
|
198
|
-
await Utils.setupViewport(page);
|
|
199
|
-
await Utils.setupBlockingResources(page);
|
|
200
|
-
},
|
|
201
|
-
],
|
|
202
|
-
requestHandler: async ({ page }) => {
|
|
203
|
-
await takeLiveScreenshot(page, '页面加载完成');
|
|
204
|
-
|
|
205
|
-
await Utils.runStep('点击登录按钮', page, async () => {
|
|
206
|
-
await page.click('#login-btn');
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
await takeLiveScreenshot(page, '登录成功');
|
|
210
|
-
},
|
|
211
|
-
});
|
|
212
|
-
|
|
213
|
-
await startLiveViewServer();
|
|
214
|
-
await crawler.run(['https://example.com']);
|
|
215
|
-
await Actor.exit();
|
|
184
|
+
// 解析 SSE 流文本
|
|
185
|
+
const events = Utils.parseSseStream(sseText);
|
|
216
186
|
```
|
|
217
187
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
### `useLiveView(liveViewKey?)`
|
|
221
|
-
|
|
222
|
-
创建 Live View 实例。
|
|
223
|
-
|
|
224
|
-
- **参数**:
|
|
225
|
-
- `liveViewKey` (可选): Key-Value Store 中的键名,默认为 `'LIVE_VIEW_SCREENSHOT'`
|
|
226
|
-
|
|
227
|
-
- **返回对象**:
|
|
228
|
-
- `startLiveViewServer()`: 启动 Express 服务器
|
|
229
|
-
- `takeLiveScreenshot(page, logMessage?)`: 捕获截图并保存
|
|
230
|
-
|
|
231
|
-
### `Utils.runStep(stepName, page, actionFn)`
|
|
232
|
-
|
|
233
|
-
执行一个步骤并自动处理失败。## 模块文档
|
|
234
|
-
|
|
235
|
-
详细文档请参阅 `docs/` 目录:
|
|
236
|
-
|
|
237
|
-
- [ApifyKit](./docs/apify-kit.md): Apify Actor 流程控制与数据全
|
|
238
|
-
- [Stealth](./docs/stealth.md): 反爬虫与指纹隐身
|
|
239
|
-
- [Humanize](./docs/humanize.md): 拟人化操作模拟
|
|
240
|
-
- [LiveView](./docs/live-view.md): 实时屏幕截图预览
|
|
241
|
-
- [Launch](./docs/launch.md): 浏览器启动配置
|
|
242
|
-
- [Utils](./docs/utils.md): 通用工具函数
|
|
243
|
-
- [Constants](./docs/constants.md): 常量定义
|
|
244
|
-
|
|
245
|
-
- **参数**:
|
|
246
|
-
- `stepName`: 步骤名称 (支持使用 `wrapStepNameWithFailedKey` 包装)
|
|
247
|
-
- `page`: Playwright Page 对象
|
|
248
|
-
- `actionFn`: 要执行的异步函数
|
|
249
|
-
|
|
250
|
-
### `Utils.setupBlockingResources(page, resourceTypes?)`
|
|
251
|
-
|
|
252
|
-
设置资源拦截器。
|
|
253
|
-
|
|
254
|
-
- **参数**:
|
|
255
|
-
- `page`: Playwright Page 对象
|
|
256
|
-
- `resourceTypes` (可选): 要屏蔽的资源类型数组,默认为 `['font', 'image', 'media']`
|
|
257
|
-
|
|
258
|
-
### `Utils.setupViewport(page, width?, height?)`
|
|
259
|
-
|
|
260
|
-
设置浏览器视口大小。
|
|
261
|
-
|
|
262
|
-
- **参数**:
|
|
263
|
-
- `page`: Playwright Page 对象
|
|
264
|
-
- `width` (可选): 宽度,默认 1920
|
|
265
|
-
- `height` (可选): 高度,默认 1080
|
|
266
|
-
|
|
267
|
-
### `Utils.parseSseStream(sseStreamText)`
|
|
268
|
-
|
|
269
|
-
解析 SSE 流文本为 JSON 对象数组。
|
|
270
|
-
|
|
271
|
-
- **参数**:
|
|
272
|
-
- `sseStreamText`: SSE 格式的文本
|
|
273
|
-
|
|
274
|
-
- **返回**: JSON 对象数组
|
|
275
|
-
|
|
276
|
-
## 📝 注意事项
|
|
277
|
-
|
|
278
|
-
- Live View 仅在 Apify 平台运行时可见(通过 Live View 选项卡)
|
|
279
|
-
- `runStep` 捕获的截图是全页截图(JPEG 格式,质量 60),用于减少数据量
|
|
280
|
-
- 资源拦截会显著提升加载速度,但可能影响需要图片/样式的页面
|
|
188
|
+
---
|
|
281
189
|
|
|
282
190
|
## 📄 License
|
|
283
191
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@skrillex1224/playwright-toolkit",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.19",
|
|
4
4
|
"description": "一个在 Apify/Crawlee Actor 中启用实时截图视图的实用工具库。",
|
|
5
5
|
"main": "dist/index.cjs",
|
|
6
6
|
"module": "dist/index.js",
|
|
@@ -19,7 +19,6 @@
|
|
|
19
19
|
},
|
|
20
20
|
"files": [
|
|
21
21
|
"dist/",
|
|
22
|
-
"docs/",
|
|
23
22
|
"README.md"
|
|
24
23
|
],
|
|
25
24
|
"keywords": [
|
package/docs/apify-kit.md
DELETED
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
# ApifyKit
|
|
2
|
-
|
|
3
|
-
`ApifyKit` 提供了一组用于简化 Apify Actor 开发的实用函数,核心是 `runStep` 机制和数据推送。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { ApifyKit } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 方法
|
|
13
|
-
|
|
14
|
-
### `runStep(stepName, page, stepFunction)`
|
|
15
|
-
|
|
16
|
-
执行一个封装的步骤。会自动记录开始日志、执行函数,并捕获错误。
|
|
17
|
-
如果步骤失败,会自动:
|
|
18
|
-
1. 打印带颜色的错误日志。
|
|
19
|
-
2. 拍摄错误截图。
|
|
20
|
-
3. 将错误信息和截图保存到 Default Dataset 中。
|
|
21
|
-
|
|
22
|
-
**参数:**
|
|
23
|
-
- `stepName` (string): 步骤名称,用于日志和错误报告。
|
|
24
|
-
- `page` (Page): Playwright Page 对象,用于截图。
|
|
25
|
-
- `stepFunction` (function): 包含实际逻辑的异步函数。
|
|
26
|
-
|
|
27
|
-
**示例:**
|
|
28
|
-
```javascript
|
|
29
|
-
await ApifyKit.runStep('填写登录表单', page, async () => {
|
|
30
|
-
await page.fill('#username', 'user');
|
|
31
|
-
await page.fill('#password', 'pass');
|
|
32
|
-
await page.click('#login');
|
|
33
|
-
});
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
### `pushSuccess(data)`
|
|
37
|
-
|
|
38
|
-
将成功的数据推送到 Default Dataset。会自动添加 `status: 'SUCCESS'` 和时间戳。
|
|
39
|
-
|
|
40
|
-
**参数:**
|
|
41
|
-
- `data` (object): 要推送的数据对象。
|
|
42
|
-
|
|
43
|
-
**示例:**
|
|
44
|
-
```javascript
|
|
45
|
-
await ApifyKit.pushSuccess({
|
|
46
|
-
title: 'Product A',
|
|
47
|
-
price: 99.9
|
|
48
|
-
});
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
### `pushFailed(stepName, errorMessage, screenshotBase64, kvStoreKey)`
|
|
52
|
-
|
|
53
|
-
(通常由 `runStep` 内部调用) 将失败信息推送到 Default Dataset。
|
|
54
|
-
|
|
55
|
-
### `wrapStepNameWithFailedKey(failedKey, stepName)`
|
|
56
|
-
将错误 Key (例如错误码) 绑定到步骤名称上,用于在失败时提取该 Key。
|
|
57
|
-
|
|
58
|
-
### `unwrapStepName(stepName)`
|
|
59
|
-
解包步骤名称,获取绑定的 Key (如果有)。
|
package/docs/constants.md
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# Constants
|
|
2
|
-
|
|
3
|
-
`Constants` 模块定义了项目中共用的常量、枚举和键名映射。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { Constants } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 导出
|
|
13
|
-
|
|
14
|
-
### `ErrorKeygen`
|
|
15
|
-
|
|
16
|
-
常用的错误代码 Key 枚举,用于 standardized error handling。
|
|
17
|
-
|
|
18
|
-
- `NotLogin`: 'not_login'
|
|
19
|
-
- `CaptchaDetected`: 'captcha_detected'
|
|
20
|
-
- `RegionRestricted`: 'region_restricted'
|
|
21
|
-
- `RateLimited`: 'rate_limited'
|
|
22
|
-
|
|
23
|
-
### `FAILED_KEY_SEPARATOR`
|
|
24
|
-
|
|
25
|
-
用于 `ApifyKit.wrapStepNameWithFailedKey` 的分隔符。
|
package/docs/humanize.md
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# Humanize
|
|
2
|
-
|
|
3
|
-
`Humanize` 模块用于模拟人类操作行为,如随机延迟和鼠标移动。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { Humanize } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 方法
|
|
13
|
-
|
|
14
|
-
### `randomSleep(min, max)`
|
|
15
|
-
|
|
16
|
-
随机等待一段毫秒数。基于 `delay` 库。
|
|
17
|
-
|
|
18
|
-
**参数:**
|
|
19
|
-
- `min` (number): 最小延迟 (ms)。
|
|
20
|
-
- `max` (number): (可选) 最大延迟 (ms)。如果未提供,则等待固定的 `min` 时间。
|
|
21
|
-
|
|
22
|
-
**示例:**
|
|
23
|
-
```javascript
|
|
24
|
-
await Humanize.randomSleep(1000, 3000); // 等待 1-3 秒
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
### `simulateGaze(cursor, durationMs)`
|
|
28
|
-
|
|
29
|
-
模拟人类“注视”或“阅读”行为:控制鼠标在页面上进行随机的小幅度移动。需要配合 `ghost-cursor` 使用。
|
|
30
|
-
|
|
31
|
-
**参数:**
|
|
32
|
-
- `cursor` (GhostCursor): `ghost-cursor` 对象。
|
|
33
|
-
- `durationMs` (number): 持续时间 (ms),默认为 2000。
|
|
34
|
-
|
|
35
|
-
**示例:**
|
|
36
|
-
```javascript
|
|
37
|
-
import { createCursor } from 'ghost-cursor-playwright';
|
|
38
|
-
const cursor = await createCursor(page);
|
|
39
|
-
await Humanize.simulateGaze(cursor, 5000);
|
|
40
|
-
```
|
package/docs/launch.md
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
# Launch
|
|
2
|
-
|
|
3
|
-
`Launch` 模块提供与浏览器启动和指纹生成相关的辅助配置。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { Launch } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 方法
|
|
13
|
-
|
|
14
|
-
### `getFingerprintGeneratorOptions(options)`
|
|
15
|
-
|
|
16
|
-
返回配置好的能够通过检测的指纹生成器选项。
|
|
17
|
-
|
|
18
|
-
**参数:**
|
|
19
|
-
- `options` (object): (可选) 覆盖默认配置。
|
|
20
|
-
|
|
21
|
-
**默认配置:**
|
|
22
|
-
- `devices`: ['desktop']
|
|
23
|
-
- `operatingSystems`: ['windows', 'macos']
|
|
24
|
-
- `browsers`: ['chrome', 'edge']
|
|
25
|
-
- `locales`: ['zh-CN', 'en-US']
|
|
26
|
-
|
|
27
|
-
### `getLaunchOptions(extraArgs)`
|
|
28
|
-
|
|
29
|
-
返回合并了 Stealth 参数的 Playwright 启动选项。
|
|
30
|
-
|
|
31
|
-
**参数:**
|
|
32
|
-
- `extraArgs` (string[]): (可选) 额外的启动参数。
|
package/docs/live-view.md
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
# LiveView
|
|
2
|
-
|
|
3
|
-
`LiveView` 模块使得在 Apify 平台上运行 Playwright 爬虫(尤其是有头模式)时,能够实时查看浏览器的屏幕截图。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { LiveView } = usePlaywrightToolKit();
|
|
10
|
-
const { startLiveViewServer, takeLiveScreenshot } = LiveView.useLiveView();
|
|
11
|
-
```
|
|
12
|
-
|
|
13
|
-
## 方法
|
|
14
|
-
|
|
15
|
-
### `startLiveViewServer()`
|
|
16
|
-
|
|
17
|
-
启动一个轻量级的 Express 服务器,用于展示最新的屏幕截图。
|
|
18
|
-
通常在 Actor 启动时调用。
|
|
19
|
-
|
|
20
|
-
**注意:** 仅在 Apify 平台上且需要 Live View 功能时调用。
|
|
21
|
-
|
|
22
|
-
### `takeLiveScreenshot(page, logMessage)`
|
|
23
|
-
|
|
24
|
-
拍摄当前页面的截图,并将其保存到 Key-Value Store 中供 Live Server 展示。
|
|
25
|
-
|
|
26
|
-
**参数:**
|
|
27
|
-
- `page` (Page): Playwright Page 对象。
|
|
28
|
-
- `logMessage` (string): (可选) 日志消息。
|
|
29
|
-
|
|
30
|
-
**示例:**
|
|
31
|
-
```javascript
|
|
32
|
-
// 定时截图
|
|
33
|
-
setInterval(() => takeLiveScreenshot(page), 5000);
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
## 配置
|
|
37
|
-
|
|
38
|
-
默认使用的 Key 为 `LIVE_VIEW_SCREENSHOT`。可以通过 `useLiveView(customKey)` 自定义。
|
package/docs/stealth.md
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# Stealth
|
|
2
|
-
|
|
3
|
-
`Stealth` 模块提供了一组反爬虫和隐身技术,旨在提高爬虫的存活率和稳定性。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { Stealth } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 方法
|
|
13
|
-
|
|
14
|
-
### `syncViewportWithScreen(page)`
|
|
15
|
-
|
|
16
|
-
**关键功能**。将 Playwright 的 Page 视口大小调整为与浏览器指纹 (window.screen) 一致。
|
|
17
|
-
这可以有效防止 "Viewport Mismatch" 类型的反爬检测(例如 Akamai, Datadome 等)。
|
|
18
|
-
|
|
19
|
-
**参数:**
|
|
20
|
-
- `page` (Page): Playwright Page 对象。
|
|
21
|
-
|
|
22
|
-
**示例:**
|
|
23
|
-
```javascript
|
|
24
|
-
// 在 preNavigationHooks 中使用
|
|
25
|
-
preNavigationHooks: [
|
|
26
|
-
async ({ page }) => {
|
|
27
|
-
await Stealth.syncViewportWithScreen(page);
|
|
28
|
-
}
|
|
29
|
-
]
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### `hideWebdriver(page)`
|
|
33
|
-
|
|
34
|
-
确保 `navigator.webdriver` 属性被隐藏或设置为 false。虽然 `puppeteer-extra-plugin-stealth` 已经做了这个,但这是一个双重保险。
|
|
35
|
-
|
|
36
|
-
### `setupBlockingResources(page, resourceTypes)`
|
|
37
|
-
|
|
38
|
-
拦截并屏蔽指定类型的资源请求,以加速页面加载并节省带宽。
|
|
39
|
-
|
|
40
|
-
**参数:**
|
|
41
|
-
- `page` (Page): Playwright Page 对象。
|
|
42
|
-
- `resourceTypes` (string[]): (可选) 默认为 `['font', 'image', 'media']`。
|
|
43
|
-
|
|
44
|
-
### `getStealthLaunchArgs()`
|
|
45
|
-
|
|
46
|
-
返回一组推荐的 Chrome 启动参数,用于隐藏自动化特征。
|
|
47
|
-
|
|
48
|
-
**示例:**
|
|
49
|
-
```javascript
|
|
50
|
-
launchContext: {
|
|
51
|
-
launchOptions: {
|
|
52
|
-
args: Stealth.getStealthLaunchArgs()
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
```
|
package/docs/utils.md
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
# Utils
|
|
2
|
-
|
|
3
|
-
`Utils` 模块包含通用的工具函数,不依赖于 Apify 或特定的爬虫逻辑。
|
|
4
|
-
|
|
5
|
-
## 引入
|
|
6
|
-
|
|
7
|
-
```javascript
|
|
8
|
-
import { usePlaywrightToolKit } from '@skrillex1224/playwright-toolkit';
|
|
9
|
-
const { Utils } = usePlaywrightToolKit();
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
## 方法
|
|
13
|
-
|
|
14
|
-
### `parseSseStream(sseStreamText)`
|
|
15
|
-
|
|
16
|
-
解析 Server-Sent Events (SSE) 格式的文本流。常用于处理 AI 模型的流式响应。
|
|
17
|
-
|
|
18
|
-
**参数:**
|
|
19
|
-
- `sseStreamText` (string): 完整的 SSE 文本。
|
|
20
|
-
|
|
21
|
-
**返回:**
|
|
22
|
-
- `Array<Object>`: 解析后的 JSON 对象数组。会自动过滤掉非 JSON 行和空行。
|
|
23
|
-
|
|
24
|
-
**示例:**
|
|
25
|
-
```javascript
|
|
26
|
-
const events = Utils.parseSseStream(responseText);
|
|
27
|
-
console.log(events[0].data);
|
|
28
|
-
```
|