nodejs_chromium 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +24 -9
  2. package/package.json +1 -1
  3. package/src/chrome.js +619 -94
package/index.js CHANGED
@@ -1,23 +1,34 @@
1
1
  require('nodejs_patch');
2
2
  const puppeteer = require("puppeteer");
3
3
  const chrome = require("./src/chrome.js");
4
+ global.__UA__ = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36';
4
5
 
5
6
  async function newChrome(params) {
6
- const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36';
7
- let { id, view, width, height, scale, mobile, dumpio, debug, incognito, path } = params;
8
- if (!width) width = 1024;
9
- if (!height) height = 768;
10
- if (!scale) scale = 1;
11
- if (!id) id = (Date.now() / 1000) + '';
7
+ let {
8
+ id = 'myChrome',
9
+ view = false,
10
+ width = 1024,
11
+ height = 768,
12
+ scale = 1,
13
+ mobile = false,
14
+ dumpio = false,
15
+ debug = false,
16
+ incognito = true,
17
+ path = void 0,
18
+ slowMo = 1,
19
+ ua = void 0
20
+ } = params;
12
21
 
13
22
  let option = {
14
23
  userDataDir: `runtime/template/${id}`,
15
24
  // timeout: 500,//最大允许超时间,默认为3000,最小215,一般不要设置
16
- slowMo: 2, //将 Puppeteer 操作减少指定的毫秒数,每一步停留时间,不能太大,否则会太慢。这样你就可以看清发生了什么,这很有用。
25
+ slowMo, //每一步停留时间,不能太大,否则会太慢,特别是在类似写入很多Cookies时,每写入一个都要等一下。
17
26
  headless: view ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
18
27
  devtools: !!debug, //打开调试
19
28
  ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
20
- ignoreDefaultArgs: ["--enable-automation"], //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
29
+ ignoreDefaultArgs: [
30
+ "--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
31
+ ],
21
32
  dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
22
33
  defaultViewport: {
23
34
  width,
@@ -26,6 +37,7 @@ async function newChrome(params) {
26
37
  isMobile: !!mobile,
27
38
  },
28
39
  args: [
40
+ '--enable-chrome-browser-cloud-management', //Cloud Browser Client Management (CBCM)
29
41
  '--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
30
42
  `--window-size=${width},${height}`,
31
43
  '--no-sandbox', //禁用沙箱模式
@@ -35,6 +47,8 @@ async function newChrome(params) {
35
47
  '--disable-blink-features=AutomationControlled', //禁用 blink 引擎的自动化控制特性,防止网站通过检测 blink 引擎的特性来判断浏览器是否处于自动化控制状态。
36
48
  // 在自动化测试或爬虫等场景中,这个参数可以帮助隐藏浏览器的自动化痕迹,使得浏览器行为更接近于真实用户操作
37
49
  '--lang=zh-CN', //设置中文环境
50
+ '--disable-extensions', //禁止启动扩展
51
+ '--disable-dev-shm-usage', //Linux系统中使用普通的文件系统缓存避免因为/dev/shm大小不足而导致的问题
38
52
  ]
39
53
  }
40
54
 
@@ -46,8 +60,9 @@ async function newChrome(params) {
46
60
 
47
61
  const browser = await puppeteer.launch(option);
48
62
  const page = (await browser.pages())[0];
63
+ // const page = await browser.newPage();
49
64
  await page.setRequestInterception(true); //允许拦截
50
- await page.setUserAgent(params.ua || ua);
65
+ if (ua) await page.setUserAgent(ua);
51
66
  await page.evaluateOnNewDocument(() => {
52
67
  const newProto = navigator.__proto__;
53
68
  delete newProto.webdriver; //删除 navigator.webdriver字段
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodejs_chromium",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/chrome.js CHANGED
@@ -1,89 +1,342 @@
1
- const { writeFileSync: saveFile } = require("fs");
1
+ const fs = require("fs");
2
2
  const { parse: parseUrl } = require("url");
3
3
 
4
+
5
+ /**
6
+ *
7
+ * class CdpFrame extends _classSuper
8
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js
9
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Page.js
10
+ * \node_modules\puppeteer-core\src\api\Page.ts
11
+ *
12
+ * @type {exports}
13
+ */
4
14
  module.exports = class {
5
15
  browser = null;
6
16
  page = null;
7
17
  responseCall = null;
18
+ requestCall = null;
19
+ isFrame = false;
20
+ visible = false;
21
+ params = {};
8
22
 
9
- constructor(browser, page, params) {
23
+ constructor(browser, page, params, isFrame = false) {
10
24
  this.browser = browser;
11
25
  this.page = page;
26
+ this.params = params;
27
+ this.visible = !!params.view;
28
+ this.isFrame = !!isFrame;
29
+ this.doListening(params);
30
+ }
12
31
 
13
- page.on('request', (request) => {
14
- const { abort_img, no_cache, append_headers } = params;
15
- if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
16
- request.abort();
17
- return;
18
- }
32
+ request(call) {
33
+ this.requestCall = call;
34
+ return this;
35
+ }
19
36
 
20
- const headers = request.headers();
21
- if (typeof append_headers === 'object') Object.assign(headers, append_headers);
37
+ response(call) {
38
+ this.responseCall = call;
39
+ return this;
40
+ }
22
41
 
23
- headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
24
- headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
25
- // headers['Access-Control-Allow-Headers'] = 'Content-Type';
42
+ /**
43
+ * 重新在当前浏览器创建新窗口
44
+ */
45
+ async clone() {
46
+ try {
47
+ const page = await this.browser.newPage();
48
+ await page.setRequestInterception(true); //允许拦截
49
+ // await page.setUserAgent(this.params.ua);
50
+ await page.evaluateOnNewDocument(() => {
51
+ const newProto = navigator.__proto__;
52
+ delete newProto.webdriver; //删除 navigator.webdriver字段
53
+ navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
54
+ });
55
+ return new module.exports(this.browser, page, this.params, false); //new 自身
56
+ }
57
+ catch (e) {
58
+ console.log('[chrome.iframe.Error]', e.message);
59
+ }
60
+ }
26
61
 
27
- if (no_cache) {
28
- const rType = request.resourceType();
29
- if (rType === 'script' || rType === 'stylesheet') {
30
- headers['Cache-Control'] = 'no-store'; // 禁用缓存
31
- }
32
- }
62
+ /**
63
+ * page.on方法
64
+ */
65
+ on(key, call) {
66
+ try {
67
+ this.page.on(key, call);
68
+ return this;
69
+ }
70
+ catch (e) {
71
+ console.log('[chrome.on.Error]', e.message);
72
+ }
73
+ }
33
74
 
34
- request.continue({ headers });
35
- });
75
+ once(key, call) {
76
+ try {
77
+ this.page.once(key, call);
78
+ return this;
79
+ }
80
+ catch (e) {
81
+ console.log('[chrome.on.Error]', e.message);
82
+ }
83
+ }
36
84
 
37
- page.on('response', async res => {
38
- if (!this.responseCall) return;
39
- let json = await this.parseResponse(res);
40
- this.responseCall(json);
41
- });
85
+ off(key, call) {
86
+ try {
87
+ this.page.off(key, call);
88
+ return this;
89
+ }
90
+ catch (e) {
91
+ console.log('[chrome.on.Error]', e.message);
92
+ }
42
93
  }
43
94
 
44
- response(call) {
45
- this.responseCall = call;
95
+ emit(key, value) {
96
+ try {
97
+ this.page.emit(key, value);
98
+ return this;
99
+ }
100
+ catch (e) {
101
+ console.log('[chrome.on.Error]', e.message);
102
+ }
46
103
  }
47
104
 
48
- async waiting(time) {
49
- if (time < 100) time = time * 1000;
50
- return await new Promise(res => setTimeout(res, time));
105
+ /**
106
+ * 这必须放在已经打开过这个URL之后
107
+ */
108
+ waitForFrame() {
109
+ try {
110
+ this.page.waitForFrame(async frame => {
111
+ return frame.name() === 'Test';
112
+ });
113
+ return this;
114
+ }
115
+ catch (e) {
116
+ console.log('[chrome.waitForFrame.Error]', e.message);
117
+ }
118
+ }
119
+
120
+
121
+ /**
122
+ * 等待框架URL包含指定字串
123
+ */
124
+ async waitFrame(urlKey, timeout = 5000) {
125
+ try {
126
+ return await this.page.waitForFrame(async frame => {
127
+ return frame.url().indexOf(urlKey) > 0;
128
+ }, { timeout });
129
+ }
130
+ catch (e) {
131
+ console.log('[chrome.waitFrame.Error]', e.message);
132
+ }
51
133
  }
52
134
 
53
- async open(url) {
54
- await this.page.goto(url, { waitUntil: 'load' });
135
+ async iframe(tag) {
136
+ try {
137
+ const frame = await (await this.page.$(tag)).contentFrame();
138
+ // return new iframe(this.browser, frame, this.params);
139
+ return new module.exports(this.browser, frame, this.params, true); //new 自身
140
+ }
141
+ catch (e) {
142
+ console.log('[chrome.iframe.Error]', e.message);
143
+ }
55
144
  }
56
145
 
57
146
  /**
58
- * 根据目标要求格式提交
59
- *
60
- * @param url
61
- * @param data
147
+ * 关闭
62
148
  */
63
- async post(url, data) {
64
- //JSON.stringify(data)
65
- await this.page.goto(url, { method: 'POST', body: data, waitUntil: 'load' });
149
+ async close(act = 3) {
150
+
151
+ try {
152
+ if (act & 1) await this.page.close();
153
+ if (act & 2) await this.browser.close();
154
+ return this;
155
+ }
156
+ catch (e) {
157
+ console.log('[chrome.close.Error]', e.message);
158
+ }
66
159
  }
67
160
 
68
- async input(el, value) {
69
- await this.page.type(el, value);
161
+ /**
162
+ * 断开进程与浏览器
163
+ */
164
+ async disconnect() {
165
+ try {
166
+ await this.browser.disconnect();
167
+ return this;
168
+ }
169
+ catch (e) {
170
+ console.log('[chrome.disconnect.Error]', e.message);
171
+ }
172
+ }
173
+
174
+ url() {
175
+ return this.page.url();
70
176
  }
71
177
 
72
- async click(el) {
73
- await this.page.click(el);
178
+
179
+ async size(width = 1024, height = 768) {
180
+ await this.page.setViewport({ width, height });
181
+ return this;
74
182
  }
75
183
 
76
184
  /**
77
- * page.on方法
185
+ * @param {Object} url
186
+ * @param {Object} option
187
+ * timeout:
188
+ * referer:
189
+ * referrerPolicy:
190
+ * waitUntil:默认
191
+ * load=*等待“加载”事件。,默认
192
+ * domcontentloaded *等待“DOMContentLoaded”事件。
193
+ * networkidle0:Waits till there are no more than 0 network connections for at least `500`ms
194
+ * networkidle2:Waits till there are no more than 2 network connections for at least `500`ms
78
195
  */
79
- async on(key, call) {
80
- await this.page.on(key, call);
196
+ async goto(url, option = {}) {
197
+ try {
198
+ let { timeout = 0, referer, waitUntil = 'load' } = option;
199
+ if (typeof url === 'number') {
200
+ if (url < 0) {
201
+ await this.page.goBack({ timeout, referer, waitUntil });
202
+ }
203
+ else {
204
+ await this.page.goForward({ timeout, referer, waitUntil });
205
+ }
206
+ }
207
+ else {
208
+ await this.page.goto(url, { timeout, referer, waitUntil });
209
+ }
210
+ await this.page.goto(url, { timeout, referer, waitUntil });
211
+ return this;
212
+ }
213
+ catch (e) {
214
+ console.log('[chrome.open.Error]', e.message);
215
+ }
81
216
  }
82
217
 
83
- async disconnect() {
84
- await this.browser.disconnect();
218
+ async open(url, option = {}) {
219
+ return await this.goto(url, option);
220
+ }
221
+
222
+ /**
223
+ * 直接获取网页全部信息,或设置
224
+ */
225
+ async content(html, option = {}) {
226
+ try {
227
+ if (html === undefined) return await this.page.content();
228
+ let { timeout = 0, waitUntil = 'load' } = option;
229
+ if (this.isFrame) {
230
+ await this.page.setFrameContent(html);
231
+ }
232
+ else {
233
+ await this.page.setContent(html, { timeout, waitUntil });
234
+ }
235
+ return this;
236
+ }
237
+ catch (e) {
238
+ console.log('[chrome.content.Error]', e.message);
239
+ }
85
240
  }
86
241
 
242
+ /**
243
+ * 获取部分或全部HTML
244
+ * @param {Object} obj
245
+ */
246
+ async html(obj) {
247
+ try {
248
+ if (obj) {
249
+ return await this.page.evaluate(ele => ele.innerHTML, obj);
250
+ }
251
+ return await this.page.evaluate(() => {
252
+ return document.documentElement.innerHTML;
253
+ });
254
+ }
255
+ catch (e) {
256
+ console.log('[chrome.html.Error]', e.message);
257
+ }
258
+ }
259
+
260
+ async text(obj) {
261
+ try {
262
+ if (obj) {
263
+ return await this.page.evaluate(ele => ele.textContent, obj);
264
+ }
265
+ return await this.page.evaluate(() => {
266
+ return document.documentElement.textContent;
267
+ });
268
+ }
269
+ catch (e) {
270
+ console.log('[chrome.text.Error]', e.message);
271
+ }
272
+ }
273
+
274
+ /**
275
+ * 显示或隐藏某个标签
276
+ *
277
+ * @param {Object} tag
278
+ * @param {Object} show 默认显示
279
+ */
280
+ async display(tag, show) {
281
+ try {
282
+ // const element = await this.page.querySelector(tag);
283
+ // element.style.display = (!!show) ? '' : 'none';
284
+ (await this.page.querySelector(tag)).style.display = (!!show) ? '' : 'none';
285
+ return this;
286
+ }
287
+ catch (e) {
288
+ console.log('[chrome.text.Error]', e.message);
289
+ }
290
+ }
291
+
292
+
293
+ /**
294
+ * 遍历tag1里的tag2
295
+ *
296
+ * @param {Object} tag1
297
+ * @param {Object} tag2
298
+ * @param {Object} call
299
+ */
300
+ async elements(tag1, tag2, call) {
301
+ try {
302
+ const div = await this.page.$(tag1);
303
+ if (!div) {
304
+ throw new Error(`${tag1} not exists`);
305
+ }
306
+ for (const elm of (await div.$$(tag2))) {
307
+ call(elm, (await elm.evaluate(node => node.innerHTML)));
308
+ }
309
+ }
310
+ catch (e) {
311
+ console.log('[chrome.elements.Error]', e.message)
312
+ }
313
+ }
314
+
315
+
316
+ /**
317
+ * 支持css普通选择器方式和伪类方式
318
+ * div.body
319
+ * div#body
320
+ * div[name=abc]
321
+ * iframe:first-child
322
+ * div>ul>li:nth-child(4)>a
323
+ *
324
+ * @param {Object} tag
325
+ */
326
+ async element(tag) {
327
+ try {
328
+ return await this.page.$(tag);
329
+ }
330
+ catch (e) {
331
+ console.log('[chrome.element.Error]', e.message);
332
+ return null;
333
+ }
334
+ }
335
+
336
+
337
+ /**
338
+ * 等待浏览器跳转
339
+ */
87
340
  async navigation(timeout = 0, tryCount = 0) {
88
341
  if (timeout < 200) timeout = timeout * 1000;
89
342
  try {
@@ -92,12 +345,15 @@ module.exports = class {
92
345
  }
93
346
  catch (e) {
94
347
  if (tryCount > 0) {
95
- return await this.navigation(ele, timeout, --tryCount)
348
+ return await this.navigation(timeout, --tryCount)
96
349
  }
97
350
  return false;
98
351
  }
99
352
  }
100
353
 
354
+ /**
355
+ * 等待某个元素出现
356
+ */
101
357
  async wait(ele, timeout = 0, tryCount = 0) {
102
358
  if (timeout < 200) timeout = timeout * 1000;
103
359
  try {
@@ -112,53 +368,239 @@ module.exports = class {
112
368
  }
113
369
  }
114
370
 
371
+
372
+ /**
373
+ * 等待x秒
374
+ * @param {Object} time
375
+ */
376
+ async waiting(time) {
377
+ return await this.sleep(time);
378
+ }
379
+
380
+ async sleep(time) {
381
+ if (time < 100) time = time * 1000;
382
+ try {
383
+ await new Promise(res => setTimeout(res, time));
384
+ return this;
385
+ }
386
+ catch (e) {
387
+ console.log('[chrome.sleep.Error]', e.message);
388
+ }
389
+ }
390
+
391
+ /**
392
+ * delay=每键入一个字符延迟毫秒
393
+ */
394
+ async input(el, value, delay = 1) {
395
+ try {
396
+ await this.page.type(el, value, { delay });
397
+ return this;
398
+ }
399
+ catch (e) {
400
+ console.log('[chrome.input.Error]', e.message);
401
+ }
402
+ }
403
+
404
+ /**
405
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js
406
+ * @param {Object} el
407
+ *
408
+ * option:
409
+ * delay =鼠标按下后延迟释放鼠标的时间(以毫秒为单位)。
410
+ * count =次数,默认1
411
+ * offset ={x,y}可单击点相对于边框左上角的偏移。
412
+ *
413
+ */
414
+ async click(el, option = {}) {
415
+ try {
416
+ let { delay = 100, count = 1, x = 6, y = 3 } = option;
417
+ await this.page.click(el, { delay, count, offset: { x, y } });
418
+ return this;
419
+ }
420
+ catch (e) {
421
+ console.log('[chrome.click.Error]', e.message);
422
+ }
423
+ }
424
+
425
+ async tap(el) {
426
+ try {
427
+ await this.page.tap(el);
428
+ return this;
429
+ }
430
+ catch (e) {
431
+ console.log('[chrome.hover.Error]', e.message);
432
+ }
433
+ }
434
+
435
+ async focus(el) {
436
+ try {
437
+ await this.page.focus(el);
438
+ return this;
439
+ }
440
+ catch (e) {
441
+ console.log('[chrome.focus.Error]', e.message);
442
+ }
443
+ }
444
+
445
+ async hover(el) {
446
+ try {
447
+ await this.page.hover(el);
448
+ return this;
449
+ }
450
+ catch (e) {
451
+ console.log('[chrome.hover.Error]', e.message);
452
+ }
453
+ }
454
+
455
+
456
+ /**
457
+ * 根据目标要求格式提交
458
+ *
459
+ * @param url
460
+ * @param data
461
+ */
462
+ async post(url, data) {
463
+ //JSON.stringify(data)
464
+ try {
465
+ await this.page.goto(url, { method: 'POST', body: data, waitUntil: 'load' });
466
+ return this;
467
+ }
468
+ catch (e) {
469
+ console.log('[chrome.post.Error]', e.message);
470
+ }
471
+ }
472
+
473
+
474
+ /**
475
+ * 保存HTML
476
+ * @param {Object} file
477
+ */
115
478
  async saveHtml(file) {
116
479
  try {
117
480
  await this.improveUrls(); //修正js/css的域名
118
481
  const body = await this.page.evaluate(() => {
119
482
  return document.documentElement.innerHTML;
120
483
  });
121
- await saveFile(file, body);
484
+ await fs.writeFileSync(file, body);
485
+ return this;
122
486
  }
123
487
  catch (e) {
124
- console.log('saveHtml Error:', e.parse());
488
+ console.log('[chrome.saveHtml.Error]', e.parse());
125
489
  }
126
490
  }
127
491
 
128
- async setCookie(cookies) {
129
- await this.page.setCookie(...cookies);
492
+
493
+ jsonArray(cookiesVal, host) {
494
+ if (!cookiesVal) return [];
495
+ return JSON.parse(cookiesVal).map(cook => {
496
+ let { name, value, domain } = cook;
497
+ if (!domain) domain = host;
498
+ return { name, value, domain };
499
+ });
500
+ }
501
+
502
+ /**
503
+
504
+ // const url = parseUrl(this.page.url());
505
+ // const host = '.' + url.host.split('.').slice(-2).join('.');
506
+ // let cookiesVal = read(file, 'utf8');
507
+ // cookiesVal = this.jsonArray(cookiesVal, host);
508
+
509
+ */
510
+
511
+ /**
512
+ * 合并两个Cookies,并以后面的值为准
513
+ *
514
+ * @param {Object} oldCookies
515
+ * @param {Object} newCookies
516
+ */
517
+ async mergeNewCookies(oldCookies, newCookies) {
518
+ if (newCookies.length === 0) return oldCookies;
519
+ // let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
520
+ let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
521
+ newCookies.map(obj => {
522
+ aMap.set(obj.name, obj)
523
+ });
524
+ return await Array.from(aMap.values());
130
525
  }
131
526
 
132
- async getCookie() {
133
- return await this.page.cookies();
527
+
528
+ /**
529
+ * 获取当前页面的Cookies
530
+ */
531
+ async getCookies() {
532
+ try {
533
+ return await this.page.cookies();
534
+ }
535
+ catch (e) {
536
+ console.log('[chrome.getCookies.Error]', e.message);
537
+ return [];
538
+ }
134
539
  }
135
540
 
136
- async saveCookies(file) {
541
+ /**
542
+ * 设置Cookies
543
+ * @param {Object} cookies
544
+ */
545
+ async setCookies(cookies) {
137
546
  try {
138
- const cookies = await this.page.cookies();
139
- await saveFile(file, JSON.stringify(cookies, null, 2));
547
+ await this.page.setCookie(...cookies);
548
+ return this;
140
549
  }
141
550
  catch (e) {
142
- console.log('saveHtml Error:', e.parse());
551
+ console.log('[chrome.setCookies.Error]', e.message);
143
552
  }
144
553
  }
145
554
 
555
+ /**
556
+ * 保存当前页面中的Cookies
557
+ * @param {Object} file
558
+ */
559
+ async saveCookies(file, append = true) {
560
+ try {
561
+ let cookies = await this.page.cookies();
562
+
563
+ if (append && fs.existsSync(file)) {
564
+ let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
565
+ cookies = this.mergeNewCookies(dbCookies, cookies);
566
+ }
567
+
568
+ await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
569
+ return this;
570
+ }
571
+ catch (e) {
572
+ console.log('[chrome.saveCookies.Error]', e.parse());
573
+ }
574
+ }
575
+
576
+ /**
577
+ * 加水印,这里实际上是在网页加个DIV并显示时间
578
+ * @param {Object} conf
579
+ */
146
580
  async watermark(conf) {
147
- await this.page.evaluate((conf) => {
148
- const wmDiv = document.createElement('div#watermark');
149
- wmDiv.style.position = 'fixed';
150
- wmDiv.style.top = `50%`;
151
- wmDiv.style.left = `50%`;
152
- wmDiv.style.transform = 'translate(-50%, -50%)';
153
- wmDiv.style.fontSize = `36px`;
154
- wmDiv.style.zIndex = '10000';
155
- // wmDiv.style.color = color;
156
- wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
157
- wmDiv.style.pointerEvents = 'none';
158
- wmDiv.innerText = conf.text;
159
- document.body.appendChild(wmDiv);
160
- }, conf);
161
- await this.page.waitForSelector('div#watermark', { timeout: 1000 });
581
+ try {
582
+ const tmpID = 'watermark' + Date.now();
583
+ await this.page.evaluate((conf, tmpID) => {
584
+ const wmDiv = document.createElement('div');
585
+ wmDiv.id = tmpID;
586
+ wmDiv.style.position = 'fixed';
587
+ wmDiv.style.top = `50%`;
588
+ wmDiv.style.left = `50%`;
589
+ wmDiv.style.transform = 'translate(-50%, -50%)';
590
+ wmDiv.style.fontSize = `36px`;
591
+ wmDiv.style.zIndex = '10000';
592
+ // wmDiv.style.color = color;
593
+ wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
594
+ wmDiv.style.pointerEvents = 'none';
595
+ wmDiv.innerText = conf.text;
596
+ document.body.appendChild(wmDiv);
597
+ }, conf, tmpID);
598
+ await this.page.waitForSelector(`div#${tmpID}`, { timeout: 1000 });
599
+ return this;
600
+ }
601
+ catch (e) {
602
+ console.log('[chrome.watermark.Error]', e.message);
603
+ }
162
604
  }
163
605
 
164
606
 
@@ -168,14 +610,20 @@ module.exports = class {
168
610
  * @param quality
169
611
  */
170
612
  async photograph(file, quality = 50) {
171
- await this.page.screenshot({
172
- path: file,
173
- fullPage: true, //全屏
174
- type: 'jpeg',
175
- quality: quality,
176
- omitBackground: true, //显示背景
177
- });
178
- // console.log('photograph=', file);
613
+ try {
614
+ await this.page.screenshot({
615
+ path: file,
616
+ fullPage: true, //全屏
617
+ type: 'jpeg',
618
+ quality: quality,
619
+ omitBackground: true, //显示背景
620
+ });
621
+ // console.log('photograph=', file);
622
+ return this;
623
+ }
624
+ catch (e) {
625
+ console.log('[chrome.photograph.Error]', e.message);
626
+ }
179
627
  }
180
628
 
181
629
  /**
@@ -185,17 +633,56 @@ module.exports = class {
185
633
  const url = parseUrl(this.page.url());
186
634
  const domain = url.protocol + '//' + url.host;
187
635
  await this.page.evaluate((domain) => {
188
- const elements = document.querySelectorAll('script[src], link[href], iframe[src]');
189
- elements.forEach((element) => {
190
- const src = element.getAttribute('src');
191
- const href = element.getAttribute('href');
192
- if (src && src.startsWith('/')) element.src = domain + src;
193
- if (href && href.startsWith('/')) element.href = domain + href;
194
- });
636
+ try {
637
+ const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
638
+ tags.forEach((ele) => {
639
+ const src = ele.getAttribute('src');
640
+ const href = ele.getAttribute('href');
641
+ if (src && src.startsWith('/')) ele.src = domain + src;
642
+ if (href && href.startsWith('/')) ele.href = domain + href;
643
+ });
644
+ }
645
+ catch (e) {
646
+ console.log('[chrome.improveUrls.Error]', e.message);
647
+ }
195
648
  }, domain);
196
649
  }
197
650
 
198
651
 
652
+ /**
653
+ * 解析网页set-cookies的值
654
+ *
655
+ * @param {Object} strCookies
656
+ */
657
+ async parseCookies(strCookies) {
658
+ return await strCookies.split("\n").map((ls, i) => {
659
+ let value = {};
660
+ ls.split(';').map((ln, j) => {
661
+ // console.log(ln);
662
+ const arr = ln.split('=');
663
+ const Key = (arr[0]).trim();
664
+ if (!Key) return;
665
+
666
+ if (j === 0) {
667
+ value.name = Key;
668
+ value.value = arr[1];
669
+ }
670
+ else if (Key === 'Max-Age') {
671
+ value.expire = parseInt(arr[1]) + (Date.now() / 1000);
672
+ }
673
+ else if (Key === 'Secure') {
674
+ value.source = true;
675
+ value.sourceScheme = 'Secure';
676
+ }
677
+ else {
678
+ value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
679
+ }
680
+ })
681
+ return value;
682
+ })
683
+ }
684
+
685
+
199
686
  async parseResponse(response) {
200
687
  // const response = await this.page.waitForResponse(res => res);
201
688
 
@@ -217,7 +704,7 @@ module.exports = class {
217
704
  value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
218
705
  // value.headers = headers;
219
706
  if (headers['server']) value.server = headers['server'];
220
- if (headers['set-cookie']) value.cookies = headers['set-cookie'];
707
+ if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
221
708
  value.remote = await response.remoteAddress(); //目标服务器
222
709
  if (value.status === 301 || value.status === 302) return value;
223
710
  if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
@@ -231,20 +718,58 @@ module.exports = class {
231
718
  if (value.post) value.post = value.post.toString();
232
719
 
233
720
  try {
234
- value.response = await response.buffer();
235
- value.response = value.response.toString();
721
+ value.buffer = await response.buffer();
722
+ value.response = value.buffer.toString();
723
+ value.json = JSON.parse(value.response);
236
724
  }
237
725
  catch (e) {
238
- value.response = e.parse();
726
+ value.json = e.parse();
239
727
  }
240
728
 
241
729
  return value;
242
730
  }
243
731
 
244
732
 
733
+ doListening(params) {
734
+
735
+ this.page.on('request', async (request) => {
736
+ if (this.requestCall) {
737
+ const run = await this.requestCall(request);
738
+ if (run === false) {
739
+ request.abort();
740
+ return;
741
+ }
742
+ }
743
+
744
+ const { abort_img = false, no_cache = true, append_headers = {} } = params;
745
+ if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
746
+ request.abort();
747
+ return;
748
+ }
749
+
750
+ const headers = request.headers();
751
+ if (append_headers !== {}) Object.assign(headers, append_headers);
752
+
753
+ headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
754
+ headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
755
+ // headers['Access-Control-Allow-Headers'] = 'Content-Type';
245
756
 
757
+ if (no_cache) {
758
+ const rType = request.resourceType();
759
+ if (rType === 'script' || rType === 'stylesheet') {
760
+ headers['Cache-Control'] = 'no-store'; // 禁用缓存
761
+ }
762
+ }
246
763
 
764
+ request.continue({ headers });
765
+ });
247
766
 
767
+ this.page.on('response', async res => {
768
+ if (!this.responseCall) return;
769
+ let json = await this.parseResponse(res);
770
+ await this.responseCall(json);
771
+ });
772
+ }
248
773
 
249
774
 
250
775
  }