nodejs_chromium 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +24 -9
- package/package.json +1 -1
- package/src/chrome.js +732 -249
package/index.js
CHANGED
|
@@ -1,23 +1,34 @@
|
|
|
1
1
|
require('nodejs_patch');
|
|
2
2
|
const puppeteer = require("puppeteer");
|
|
3
3
|
const chrome = require("./src/chrome.js");
|
|
4
|
+
global.__UA__ = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36';
|
|
4
5
|
|
|
5
6
|
async function newChrome(params) {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
let {
|
|
8
|
+
id = 'myChrome',
|
|
9
|
+
view = false,
|
|
10
|
+
width = 1024,
|
|
11
|
+
height = 768,
|
|
12
|
+
scale = 1,
|
|
13
|
+
mobile = false,
|
|
14
|
+
dumpio = false,
|
|
15
|
+
debug = false,
|
|
16
|
+
incognito = true,
|
|
17
|
+
path = void 0,
|
|
18
|
+
slowMo = 1,
|
|
19
|
+
ua = void 0
|
|
20
|
+
} = params;
|
|
12
21
|
|
|
13
22
|
let option = {
|
|
14
23
|
userDataDir: `runtime/template/${id}`,
|
|
15
24
|
// timeout: 500,//最大允许超时间,默认为3000,最小215,一般不要设置
|
|
16
|
-
slowMo
|
|
25
|
+
slowMo, //每一步停留时间,不能太大,否则会太慢,特别是在类似写入很多Cookies时,每写入一个都要等一下。
|
|
17
26
|
headless: view ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
|
|
18
27
|
devtools: !!debug, //打开调试
|
|
19
28
|
ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
|
|
20
|
-
ignoreDefaultArgs: [
|
|
29
|
+
ignoreDefaultArgs: [
|
|
30
|
+
"--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
|
|
31
|
+
],
|
|
21
32
|
dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
|
|
22
33
|
defaultViewport: {
|
|
23
34
|
width,
|
|
@@ -26,6 +37,7 @@ async function newChrome(params) {
|
|
|
26
37
|
isMobile: !!mobile,
|
|
27
38
|
},
|
|
28
39
|
args: [
|
|
40
|
+
'--enable-chrome-browser-cloud-management', //Cloud Browser Client Management (CBCM)
|
|
29
41
|
'--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
|
|
30
42
|
`--window-size=${width},${height}`,
|
|
31
43
|
'--no-sandbox', //禁用沙箱模式
|
|
@@ -35,6 +47,8 @@ async function newChrome(params) {
|
|
|
35
47
|
'--disable-blink-features=AutomationControlled', //禁用 blink 引擎的自动化控制特性,防止网站通过检测 blink 引擎的特性来判断浏览器是否处于自动化控制状态。
|
|
36
48
|
// 在自动化测试或爬虫等场景中,这个参数可以帮助隐藏浏览器的自动化痕迹,使得浏览器行为更接近于真实用户操作
|
|
37
49
|
'--lang=zh-CN', //设置中文环境
|
|
50
|
+
'--disable-extensions', //禁止启动扩展
|
|
51
|
+
'--disable-dev-shm-usage', //Linux系统中使用普通的文件系统缓存避免因为/dev/shm大小不足而导致的问题
|
|
38
52
|
]
|
|
39
53
|
}
|
|
40
54
|
|
|
@@ -46,8 +60,9 @@ async function newChrome(params) {
|
|
|
46
60
|
|
|
47
61
|
const browser = await puppeteer.launch(option);
|
|
48
62
|
const page = (await browser.pages())[0];
|
|
63
|
+
// const page = await browser.newPage();
|
|
49
64
|
await page.setRequestInterception(true); //允许拦截
|
|
50
|
-
await page.setUserAgent(
|
|
65
|
+
if (ua) await page.setUserAgent(ua);
|
|
51
66
|
await page.evaluateOnNewDocument(() => {
|
|
52
67
|
const newProto = navigator.__proto__;
|
|
53
68
|
delete newProto.webdriver; //删除 navigator.webdriver字段
|
package/package.json
CHANGED
package/src/chrome.js
CHANGED
|
@@ -1,250 +1,733 @@
|
|
|
1
|
-
const
|
|
2
|
-
const {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
1
|
+
const fs = require("fs");
|
|
2
|
+
const {parse: parseUrl} = require("url");
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
*
|
|
7
|
+
* class CdpFrame extends _classSuper
|
|
8
|
+
* \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js
|
|
9
|
+
* \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Page.js
|
|
10
|
+
* \node_modules\puppeteer-core\src\api\Page.ts
|
|
11
|
+
*
|
|
12
|
+
* @type {exports}
|
|
13
|
+
*/
|
|
14
|
+
module.exports = class {
|
|
15
|
+
browser = null;
|
|
16
|
+
page = null;
|
|
17
|
+
responseCall = null;
|
|
18
|
+
requestCall = null;
|
|
19
|
+
isFrame = false;
|
|
20
|
+
params = {};
|
|
21
|
+
|
|
22
|
+
constructor(browser, page, params, isFrame = false) {
|
|
23
|
+
this.browser = browser;
|
|
24
|
+
this.page = page;
|
|
25
|
+
this.params = params;
|
|
26
|
+
this.isFrame = !!isFrame;
|
|
27
|
+
this.doListening(params);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
request(call) {
|
|
31
|
+
this.requestCall = call;
|
|
32
|
+
return this;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
response(call) {
|
|
36
|
+
this.responseCall = call;
|
|
37
|
+
return this;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* 重新在当前浏览器创建新窗口
|
|
42
|
+
*/
|
|
43
|
+
async clone() {
|
|
44
|
+
try {
|
|
45
|
+
const page = await this.browser.newPage();
|
|
46
|
+
await page.setRequestInterception(true); //允许拦截
|
|
47
|
+
// await page.setUserAgent(this.params.ua);
|
|
48
|
+
await page.evaluateOnNewDocument(() => {
|
|
49
|
+
const newProto = navigator.__proto__;
|
|
50
|
+
delete newProto.webdriver; //删除 navigator.webdriver字段
|
|
51
|
+
navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
|
|
52
|
+
});
|
|
53
|
+
return new module.exports(this.browser, page, this.params, false); //new 自身
|
|
54
|
+
} catch (e) {
|
|
55
|
+
console.log('[chrome.iframe.Error]', e.message);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* page.on方法
|
|
61
|
+
*/
|
|
62
|
+
on(key, call) {
|
|
63
|
+
try {
|
|
64
|
+
this.page.on(key, call);
|
|
65
|
+
return this;
|
|
66
|
+
} catch (e) {
|
|
67
|
+
console.log('[chrome.on.Error]', e.message);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
once(key, call) {
|
|
72
|
+
try {
|
|
73
|
+
this.page.once(key, call);
|
|
74
|
+
return this;
|
|
75
|
+
} catch (e) {
|
|
76
|
+
console.log('[chrome.on.Error]', e.message);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
off(key, call) {
|
|
81
|
+
try {
|
|
82
|
+
this.page.off(key, call);
|
|
83
|
+
return this;
|
|
84
|
+
} catch (e) {
|
|
85
|
+
console.log('[chrome.on.Error]', e.message);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
emit(key, value) {
|
|
90
|
+
try {
|
|
91
|
+
this.page.emit(key, value);
|
|
92
|
+
return this;
|
|
93
|
+
} catch (e) {
|
|
94
|
+
console.log('[chrome.on.Error]', e.message);
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* 这必须放在已经打开过这个URL之后
|
|
100
|
+
*/
|
|
101
|
+
waitForFrame() {
|
|
102
|
+
try {
|
|
103
|
+
this.page.waitForFrame(async frame => {
|
|
104
|
+
return frame.name() === 'Test';
|
|
105
|
+
});
|
|
106
|
+
return this;
|
|
107
|
+
} catch (e) {
|
|
108
|
+
console.log('[chrome.waitForFrame.Error]', e.message);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* 等待框架URL包含指定字串
|
|
115
|
+
*/
|
|
116
|
+
async waitFrame(urlKey, timeout = 5000) {
|
|
117
|
+
try {
|
|
118
|
+
return await this.page.waitForFrame(async frame => {
|
|
119
|
+
return frame.url().indexOf(urlKey) > 0;
|
|
120
|
+
}, {timeout});
|
|
121
|
+
} catch (e) {
|
|
122
|
+
console.log('[chrome.waitFrame.Error]', e.message);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async iframe(tag) {
|
|
127
|
+
try {
|
|
128
|
+
const frame = await (await this.page.$(tag)).contentFrame();
|
|
129
|
+
// return new iframe(this.browser, frame, this.params);
|
|
130
|
+
return new module.exports(this.browser, frame, this.params, true); //new 自身
|
|
131
|
+
} catch (e) {
|
|
132
|
+
console.log('[chrome.iframe.Error]', e.message);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* 关闭
|
|
138
|
+
*/
|
|
139
|
+
async close(act = 3) {
|
|
140
|
+
|
|
141
|
+
try {
|
|
142
|
+
if (act & 1) await this.page.close();
|
|
143
|
+
if (act & 2) await this.browser.close();
|
|
144
|
+
return this;
|
|
145
|
+
} catch (e) {
|
|
146
|
+
console.log('[chrome.close.Error]', e.message);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* 断开进程与浏览器
|
|
152
|
+
*/
|
|
153
|
+
async disconnect() {
|
|
154
|
+
try {
|
|
155
|
+
await this.browser.disconnect();
|
|
156
|
+
return this;
|
|
157
|
+
} catch (e) {
|
|
158
|
+
console.log('[chrome.disconnect.Error]', e.message);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
url() {
|
|
163
|
+
return this.page.url();
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
async size(width = 1024, height = 768) {
|
|
168
|
+
await this.page.setViewport({width, height});
|
|
169
|
+
return this;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* @param {Object} url
|
|
174
|
+
* @param {Object} option
|
|
175
|
+
* timeout:
|
|
176
|
+
* referer:
|
|
177
|
+
* referrerPolicy:
|
|
178
|
+
* waitUntil:默认
|
|
179
|
+
* load=*等待“加载”事件。,默认
|
|
180
|
+
* domcontentloaded *等待“DOMContentLoaded”事件。
|
|
181
|
+
* networkidle0:Waits till there are no more than 0 network connections for at least `500`ms
|
|
182
|
+
* networkidle2:Waits till there are no more than 2 network connections for at least `500`ms
|
|
183
|
+
*/
|
|
184
|
+
async goto(url, option = {}) {
|
|
185
|
+
try {
|
|
186
|
+
let {timeout = 0, referer, waitUntil = 'load'} = option;
|
|
187
|
+
if (typeof url === 'number') {
|
|
188
|
+
if (url < 0) {
|
|
189
|
+
await this.page.goBack({timeout, referer, waitUntil});
|
|
190
|
+
} else {
|
|
191
|
+
await this.page.goForward({timeout, referer, waitUntil});
|
|
192
|
+
}
|
|
193
|
+
} else {
|
|
194
|
+
await this.page.goto(url, {timeout, referer, waitUntil});
|
|
195
|
+
}
|
|
196
|
+
await this.page.goto(url, {timeout, referer, waitUntil});
|
|
197
|
+
return this;
|
|
198
|
+
} catch (e) {
|
|
199
|
+
console.log('[chrome.open.Error]', e.message);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
async open(url, option = {}) {
|
|
204
|
+
return await this.goto(url, option);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* 直接获取网页全部信息,或设置
|
|
209
|
+
*/
|
|
210
|
+
async content(html, option = {}) {
|
|
211
|
+
try {
|
|
212
|
+
if (html === undefined) return await this.page.content();
|
|
213
|
+
let {timeout = 0, waitUntil = 'load'} = option;
|
|
214
|
+
if (this.isFrame) {
|
|
215
|
+
await this.page.setFrameContent(html);
|
|
216
|
+
} else {
|
|
217
|
+
await this.page.setContent(html, {timeout, waitUntil});
|
|
218
|
+
}
|
|
219
|
+
return this;
|
|
220
|
+
} catch (e) {
|
|
221
|
+
console.log('[chrome.content.Error]', e.message);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* 获取部分或全部HTML
|
|
227
|
+
* @param {Object} obj
|
|
228
|
+
*/
|
|
229
|
+
async html(obj) {
|
|
230
|
+
try {
|
|
231
|
+
if (obj) {
|
|
232
|
+
return await this.page.evaluate(ele => ele.innerHTML, obj);
|
|
233
|
+
}
|
|
234
|
+
return await this.page.evaluate(() => {
|
|
235
|
+
return document.documentElement.innerHTML;
|
|
236
|
+
});
|
|
237
|
+
} catch (e) {
|
|
238
|
+
console.log('[chrome.html.Error]', e.message);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
async text(obj) {
|
|
243
|
+
try {
|
|
244
|
+
if (obj) {
|
|
245
|
+
return await this.page.evaluate(ele => ele.textContent, obj);
|
|
246
|
+
}
|
|
247
|
+
return await this.page.evaluate(() => {
|
|
248
|
+
return document.documentElement.textContent;
|
|
249
|
+
});
|
|
250
|
+
} catch (e) {
|
|
251
|
+
console.log('[chrome.text.Error]', e.message);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* 显示或隐藏某个标签
|
|
257
|
+
*
|
|
258
|
+
* @param {Object} tag
|
|
259
|
+
* @param {Object} show 默认显示
|
|
260
|
+
*/
|
|
261
|
+
async display(tag, show) {
|
|
262
|
+
try {
|
|
263
|
+
// const element = await this.page.querySelector(tag);
|
|
264
|
+
// element.style.display = (!!show) ? '' : 'none';
|
|
265
|
+
(await this.page.querySelector(tag)).style.display = (!!show) ? '' : 'none';
|
|
266
|
+
return this;
|
|
267
|
+
} catch (e) {
|
|
268
|
+
console.log('[chrome.text.Error]', e.message);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* 遍历tag1里的tag2
|
|
275
|
+
*
|
|
276
|
+
* @param {Object} tag1
|
|
277
|
+
* @param {Object} tag2
|
|
278
|
+
* @param {Object} call
|
|
279
|
+
*/
|
|
280
|
+
async elements(tag1, tag2, call) {
|
|
281
|
+
try {
|
|
282
|
+
const div = await this.page.$(tag1);
|
|
283
|
+
if (!div) {
|
|
284
|
+
throw new Error(`${tag1} not exists`);
|
|
285
|
+
}
|
|
286
|
+
for (const elm of (await div.$$(tag2))) {
|
|
287
|
+
call(elm, (await elm.evaluate(node => node.innerHTML)));
|
|
288
|
+
}
|
|
289
|
+
} catch (e) {
|
|
290
|
+
console.log('[chrome.elements.Error]', e.message)
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* 支持css普通选择器方式和伪类方式
|
|
297
|
+
* div.body
|
|
298
|
+
* div#body
|
|
299
|
+
* div[name=abc]
|
|
300
|
+
* iframe:first-child
|
|
301
|
+
* div>ul>li:nth-child(4)>a
|
|
302
|
+
*
|
|
303
|
+
* @param {Object} tag
|
|
304
|
+
*/
|
|
305
|
+
async element(tag) {
|
|
306
|
+
try {
|
|
307
|
+
return await this.page.$(tag);
|
|
308
|
+
} catch (e) {
|
|
309
|
+
console.log('[chrome.element.Error]', e.message);
|
|
310
|
+
return null;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* 等待浏览器跳转
|
|
317
|
+
*/
|
|
318
|
+
async navigation(timeout = 0, tryCount = 0) {
|
|
319
|
+
if (timeout < 200) timeout = timeout * 1000;
|
|
320
|
+
try {
|
|
321
|
+
await this.page.waitForNavigation({timeout});
|
|
322
|
+
return true;
|
|
323
|
+
} catch (e) {
|
|
324
|
+
if (tryCount > 0) {
|
|
325
|
+
return await this.navigation(timeout, --tryCount)
|
|
326
|
+
}
|
|
327
|
+
return false;
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* 等待某个元素出现
|
|
333
|
+
*/
|
|
334
|
+
async wait(ele, timeout = 0, tryCount = 0) {
|
|
335
|
+
if (timeout < 200) timeout = timeout * 1000;
|
|
336
|
+
try {
|
|
337
|
+
await this.page.waitForSelector(ele, {timeout});
|
|
338
|
+
return true;
|
|
339
|
+
} catch (e) {
|
|
340
|
+
if (tryCount > 0) {
|
|
341
|
+
return await this.wait(ele, timeout, --tryCount)
|
|
342
|
+
}
|
|
343
|
+
return false;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* 等待x秒
|
|
350
|
+
* @param {Object} time
|
|
351
|
+
*/
|
|
352
|
+
async waiting(time) {
|
|
353
|
+
return await this.sleep(time);
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
async sleep(time) {
|
|
357
|
+
if (time < 100) time = time * 1000;
|
|
358
|
+
try {
|
|
359
|
+
await new Promise(res => setTimeout(res, time));
|
|
360
|
+
return this;
|
|
361
|
+
} catch (e) {
|
|
362
|
+
console.log('[chrome.sleep.Error]', e.message);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
/**
|
|
367
|
+
* delay=每键入一个字符延迟毫秒
|
|
368
|
+
*/
|
|
369
|
+
async input(el, value, delay = 1) {
|
|
370
|
+
try {
|
|
371
|
+
await this.page.type(el, value, {delay});
|
|
372
|
+
return this;
|
|
373
|
+
} catch (e) {
|
|
374
|
+
console.log('[chrome.input.Error]', e.message);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js
|
|
380
|
+
* @param {Object} el
|
|
381
|
+
*
|
|
382
|
+
* option:
|
|
383
|
+
* delay =鼠标按下后延迟释放鼠标的时间(以毫秒为单位)。
|
|
384
|
+
* count =次数,默认1
|
|
385
|
+
* offset ={x,y}可单击点相对于边框左上角的偏移。
|
|
386
|
+
*
|
|
387
|
+
*/
|
|
388
|
+
async click(el, option = {}) {
|
|
389
|
+
try {
|
|
390
|
+
let {delay = 100, count = 1, x = 6, y = 3} = option;
|
|
391
|
+
await this.page.click(el, {delay, count, offset: {x, y}});
|
|
392
|
+
return this;
|
|
393
|
+
} catch (e) {
|
|
394
|
+
console.log('[chrome.click.Error]', e.message);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async tap(el) {
|
|
399
|
+
try {
|
|
400
|
+
await this.page.tap(el);
|
|
401
|
+
return this;
|
|
402
|
+
} catch (e) {
|
|
403
|
+
console.log('[chrome.hover.Error]', e.message);
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
async focus(el) {
|
|
408
|
+
try {
|
|
409
|
+
await this.page.focus(el);
|
|
410
|
+
return this;
|
|
411
|
+
} catch (e) {
|
|
412
|
+
console.log('[chrome.focus.Error]', e.message);
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
async hover(el) {
|
|
417
|
+
try {
|
|
418
|
+
await this.page.hover(el);
|
|
419
|
+
return this;
|
|
420
|
+
} catch (e) {
|
|
421
|
+
console.log('[chrome.hover.Error]', e.message);
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
/**
|
|
427
|
+
* 根据目标要求格式提交
|
|
428
|
+
*
|
|
429
|
+
* @param url
|
|
430
|
+
* @param data
|
|
431
|
+
*/
|
|
432
|
+
async post(url, data) {
|
|
433
|
+
//JSON.stringify(data)
|
|
434
|
+
try {
|
|
435
|
+
await this.page.goto(url, {method: 'POST', body: data, waitUntil: 'load'});
|
|
436
|
+
return this;
|
|
437
|
+
} catch (e) {
|
|
438
|
+
console.log('[chrome.post.Error]', e.message);
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* 保存HTML
|
|
445
|
+
* @param {Object} file
|
|
446
|
+
*/
|
|
447
|
+
async saveHtml(file) {
|
|
448
|
+
try {
|
|
449
|
+
await this.improveUrls(); //修正js/css的域名
|
|
450
|
+
const body = await this.page.evaluate(() => {
|
|
451
|
+
return document.documentElement.innerHTML;
|
|
452
|
+
});
|
|
453
|
+
await fs.writeFileSync(file, body);
|
|
454
|
+
return this;
|
|
455
|
+
} catch (e) {
|
|
456
|
+
console.log('[chrome.saveHtml.Error]', e.parse());
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
jsonArray(cookiesVal, host) {
|
|
462
|
+
if (!cookiesVal) return [];
|
|
463
|
+
return JSON.parse(cookiesVal).map(cook => {
|
|
464
|
+
let {name, value, domain} = cook;
|
|
465
|
+
if (!domain) domain = host;
|
|
466
|
+
return {name, value, domain};
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
/**
|
|
471
|
+
|
|
472
|
+
// const url = parseUrl(this.page.url());
|
|
473
|
+
// const host = '.' + url.host.split('.').slice(-2).join('.');
|
|
474
|
+
// let cookiesVal = read(file, 'utf8');
|
|
475
|
+
// cookiesVal = this.jsonArray(cookiesVal, host);
|
|
476
|
+
|
|
477
|
+
*/
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* 合并两个Cookies,并以后面的值为准
|
|
481
|
+
*
|
|
482
|
+
* @param {Object} oldCookies
|
|
483
|
+
* @param {Object} newCookies
|
|
484
|
+
*/
|
|
485
|
+
async mergeNewCookies(oldCookies, newCookies) {
|
|
486
|
+
if (newCookies.length === 0) return oldCookies;
|
|
487
|
+
// let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
|
|
488
|
+
let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
|
|
489
|
+
newCookies.map(obj => {
|
|
490
|
+
aMap.set(obj.name, obj)
|
|
491
|
+
});
|
|
492
|
+
return await Array.from(aMap.values());
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* 获取当前页面的Cookies
|
|
498
|
+
*/
|
|
499
|
+
async getCookies() {
|
|
500
|
+
try {
|
|
501
|
+
return await this.page.cookies();
|
|
502
|
+
} catch (e) {
|
|
503
|
+
console.log('[chrome.getCookies.Error]', e.message);
|
|
504
|
+
return [];
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* 设置Cookies
|
|
510
|
+
* @param {Object} cookies
|
|
511
|
+
*/
|
|
512
|
+
async setCookies(cookies) {
|
|
513
|
+
try {
|
|
514
|
+
await this.page.setCookie(...cookies);
|
|
515
|
+
return this;
|
|
516
|
+
} catch (e) {
|
|
517
|
+
console.log('[chrome.setCookies.Error]', e.message);
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
/**
|
|
522
|
+
* 保存当前页面中的Cookies
|
|
523
|
+
* @param {Object} file
|
|
524
|
+
*/
|
|
525
|
+
async saveCookies(file, append = true) {
|
|
526
|
+
try {
|
|
527
|
+
let cookies = await this.page.cookies();
|
|
528
|
+
|
|
529
|
+
if (append && fs.existsSync(file)) {
|
|
530
|
+
let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
531
|
+
cookies = this.mergeNewCookies(dbCookies, cookies);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
|
|
535
|
+
return this;
|
|
536
|
+
} catch (e) {
|
|
537
|
+
console.log('[chrome.saveCookies.Error]', e.parse());
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* 加水印,这里实际上是在网页加个DIV并显示时间
|
|
543
|
+
* @param {Object} conf
|
|
544
|
+
*/
|
|
545
|
+
async watermark(conf) {
|
|
546
|
+
try {
|
|
547
|
+
const tmpID = 'watermark' + Date.now();
|
|
548
|
+
await this.page.evaluate((conf, tmpID) => {
|
|
549
|
+
const wmDiv = document.createElement('div');
|
|
550
|
+
wmDiv.id = tmpID;
|
|
551
|
+
wmDiv.style.position = 'fixed';
|
|
552
|
+
wmDiv.style.top = `50%`;
|
|
553
|
+
wmDiv.style.left = `50%`;
|
|
554
|
+
wmDiv.style.transform = 'translate(-50%, -50%)';
|
|
555
|
+
wmDiv.style.fontSize = `36px`;
|
|
556
|
+
wmDiv.style.zIndex = '10000';
|
|
557
|
+
// wmDiv.style.color = color;
|
|
558
|
+
wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
|
|
559
|
+
wmDiv.style.pointerEvents = 'none';
|
|
560
|
+
wmDiv.innerText = conf.text;
|
|
561
|
+
document.body.appendChild(wmDiv);
|
|
562
|
+
}, conf, tmpID);
|
|
563
|
+
await this.page.waitForSelector(`div#${tmpID}`, {timeout: 1000});
|
|
564
|
+
return this;
|
|
565
|
+
} catch (e) {
|
|
566
|
+
console.log('[chrome.watermark.Error]', e.message);
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* 网页拍照
|
|
573
|
+
* @param file
|
|
574
|
+
* @param quality
|
|
575
|
+
*/
|
|
576
|
+
async photograph(file, quality = 50) {
|
|
577
|
+
try {
|
|
578
|
+
await this.page.screenshot({
|
|
579
|
+
path: file,
|
|
580
|
+
fullPage: true, //全屏
|
|
581
|
+
type: 'jpeg',
|
|
582
|
+
quality: quality,
|
|
583
|
+
omitBackground: true, //显示背景
|
|
584
|
+
});
|
|
585
|
+
// console.log('photograph=', file);
|
|
586
|
+
return this;
|
|
587
|
+
} catch (e) {
|
|
588
|
+
console.log('[chrome.photograph.Error]', e.message);
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* 补全所有本地js/css
|
|
594
|
+
*/
|
|
595
|
+
async improveUrls() {
|
|
596
|
+
const url = parseUrl(this.page.url());
|
|
597
|
+
const domain = url.protocol + '//' + url.host;
|
|
598
|
+
await this.page.evaluate((domain) => {
|
|
599
|
+
try {
|
|
600
|
+
const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
|
|
601
|
+
tags.forEach((ele) => {
|
|
602
|
+
const src = ele.getAttribute('src');
|
|
603
|
+
const href = ele.getAttribute('href');
|
|
604
|
+
if (src && src.startsWith('/')) ele.src = domain + src;
|
|
605
|
+
if (href && href.startsWith('/')) ele.href = domain + href;
|
|
606
|
+
});
|
|
607
|
+
} catch (e) {
|
|
608
|
+
console.log('[chrome.improveUrls.Error]', e.message);
|
|
609
|
+
}
|
|
610
|
+
}, domain);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* 解析网页set-cookies的值
|
|
616
|
+
*
|
|
617
|
+
* @param {Object} strCookies
|
|
618
|
+
*/
|
|
619
|
+
async parseCookies(strCookies) {
|
|
620
|
+
return await strCookies.split("\n").map((ls, i) => {
|
|
621
|
+
let value = {};
|
|
622
|
+
ls.split(';').map((ln, j) => {
|
|
623
|
+
// console.log(ln);
|
|
624
|
+
const arr = ln.split('=');
|
|
625
|
+
const Key = (arr[0]).trim();
|
|
626
|
+
if (!Key) return;
|
|
627
|
+
|
|
628
|
+
if (j === 0) {
|
|
629
|
+
value.name = Key;
|
|
630
|
+
value.value = arr[1];
|
|
631
|
+
} else if (Key === 'Max-Age') {
|
|
632
|
+
value.expire = parseInt(arr[1]) + (Date.now() / 1000);
|
|
633
|
+
} else if (Key === 'Secure') {
|
|
634
|
+
value.source = true;
|
|
635
|
+
value.sourceScheme = 'Secure';
|
|
636
|
+
} else {
|
|
637
|
+
value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
|
|
638
|
+
}
|
|
639
|
+
})
|
|
640
|
+
return value;
|
|
641
|
+
})
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
async parseResponse(response) {
|
|
646
|
+
// const response = await this.page.waitForResponse(res => res);
|
|
647
|
+
|
|
648
|
+
const value = {};
|
|
649
|
+
const request = await response.request();
|
|
650
|
+
const headers = await response.headers();
|
|
651
|
+
value.method = await request.method();
|
|
652
|
+
if (value.method === 'OPTIONS') return;
|
|
653
|
+
|
|
654
|
+
value.type = await request.resourceType();
|
|
655
|
+
// value.redirect = await response.redirectURL();
|
|
656
|
+
if (value.type === 'xhr') value.type = 'AJAX';
|
|
657
|
+
value.url = await response.url();
|
|
658
|
+
value.domain = parseUrl(value.url)['host'];
|
|
659
|
+
value.content = headers['content-type'];
|
|
660
|
+
value.length = headers['content-length'];
|
|
661
|
+
value.status = await response.status();
|
|
662
|
+
value.ok = await response.ok();
|
|
663
|
+
value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
|
|
664
|
+
// value.headers = headers;
|
|
665
|
+
if (headers['server']) value.server = headers['server'];
|
|
666
|
+
if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
|
|
667
|
+
value.remote = await response.remoteAddress(); //目标服务器
|
|
668
|
+
if (value.status === 301 || value.status === 302) return value;
|
|
669
|
+
if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
|
|
670
|
+
if (value.content) {
|
|
671
|
+
if (value.content.startsWith('application/vnd')) return value;
|
|
672
|
+
if (value.content.startsWith('application/xml')) return value;
|
|
673
|
+
if (value.content.startsWith('text/css')) return value;
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
value.post = await request.postData();
|
|
677
|
+
if (value.post) value.post = value.post.toString();
|
|
678
|
+
|
|
679
|
+
try {
|
|
680
|
+
value.buffer = await response.buffer();
|
|
681
|
+
value.response = value.buffer.toString();
|
|
682
|
+
value.json = JSON.parse(value.response);
|
|
683
|
+
} catch (e) {
|
|
684
|
+
value.json = e.parse();
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
return value;
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
doListening(params) {
|
|
692
|
+
|
|
693
|
+
this.page.on('request', async (request) => {
|
|
694
|
+
if (this.requestCall) {
|
|
695
|
+
const run = await this.requestCall(request);
|
|
696
|
+
if (run === false) {
|
|
697
|
+
request.abort();
|
|
698
|
+
return;
|
|
699
|
+
}
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
const {abort_img = false, no_cache = true, append_headers = {}} = params;
|
|
703
|
+
if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
|
|
704
|
+
request.abort();
|
|
705
|
+
return;
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
const headers = request.headers();
|
|
709
|
+
if (append_headers !== {}) Object.assign(headers, append_headers);
|
|
710
|
+
|
|
711
|
+
headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
|
|
712
|
+
headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
|
|
713
|
+
// headers['Access-Control-Allow-Headers'] = 'Content-Type';
|
|
714
|
+
|
|
715
|
+
if (no_cache) {
|
|
716
|
+
const rType = request.resourceType();
|
|
717
|
+
if (rType === 'script' || rType === 'stylesheet') {
|
|
718
|
+
headers['Cache-Control'] = 'no-store'; // 禁用缓存
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
request.continue({headers});
|
|
723
|
+
});
|
|
724
|
+
|
|
725
|
+
this.page.on('response', async res => {
|
|
726
|
+
if (!this.responseCall) return;
|
|
727
|
+
let json = await this.parseResponse(res);
|
|
728
|
+
this.responseCall(json);
|
|
729
|
+
});
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
|
|
250
733
|
}
|