nodejs_chromium 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +2 -2
  2. package/package.json +1 -1
  3. package/src/chrome.js +780 -732
package/src/chrome.js CHANGED
@@ -1,733 +1,781 @@
1
- const fs = require("fs");
2
- const {parse: parseUrl} = require("url");
3
-
4
-
5
- /**
6
- *
7
- * class CdpFrame extends _classSuper
8
- * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js
9
- * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Page.js
10
- * \node_modules\puppeteer-core\src\api\Page.ts
11
- *
12
- * @type {exports}
13
- */
14
- module.exports = class {
15
- browser = null;
16
- page = null;
17
- responseCall = null;
18
- requestCall = null;
19
- isFrame = false;
20
- params = {};
21
-
22
- constructor(browser, page, params, isFrame = false) {
23
- this.browser = browser;
24
- this.page = page;
25
- this.params = params;
26
- this.isFrame = !!isFrame;
27
- this.doListening(params);
28
- }
29
-
30
- request(call) {
31
- this.requestCall = call;
32
- return this;
33
- }
34
-
35
- response(call) {
36
- this.responseCall = call;
37
- return this;
38
- }
39
-
40
- /**
41
- * 重新在当前浏览器创建新窗口
42
- */
43
- async clone() {
44
- try {
45
- const page = await this.browser.newPage();
46
- await page.setRequestInterception(true); //允许拦截
47
- // await page.setUserAgent(this.params.ua);
48
- await page.evaluateOnNewDocument(() => {
49
- const newProto = navigator.__proto__;
50
- delete newProto.webdriver; //删除 navigator.webdriver字段
51
- navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
52
- });
53
- return new module.exports(this.browser, page, this.params, false); //new 自身
54
- } catch (e) {
55
- console.log('[chrome.iframe.Error]', e.message);
56
- }
57
- }
58
-
59
- /**
60
- * page.on方法
61
- */
62
- on(key, call) {
63
- try {
64
- this.page.on(key, call);
65
- return this;
66
- } catch (e) {
67
- console.log('[chrome.on.Error]', e.message);
68
- }
69
- }
70
-
71
- once(key, call) {
72
- try {
73
- this.page.once(key, call);
74
- return this;
75
- } catch (e) {
76
- console.log('[chrome.on.Error]', e.message);
77
- }
78
- }
79
-
80
- off(key, call) {
81
- try {
82
- this.page.off(key, call);
83
- return this;
84
- } catch (e) {
85
- console.log('[chrome.on.Error]', e.message);
86
- }
87
- }
88
-
89
- emit(key, value) {
90
- try {
91
- this.page.emit(key, value);
92
- return this;
93
- } catch (e) {
94
- console.log('[chrome.on.Error]', e.message);
95
- }
96
- }
97
-
98
- /**
99
- * 这必须放在已经打开过这个URL之后
100
- */
101
- waitForFrame() {
102
- try {
103
- this.page.waitForFrame(async frame => {
104
- return frame.name() === 'Test';
105
- });
106
- return this;
107
- } catch (e) {
108
- console.log('[chrome.waitForFrame.Error]', e.message);
109
- }
110
- }
111
-
112
-
113
- /**
114
- * 等待框架URL包含指定字串
115
- */
116
- async waitFrame(urlKey, timeout = 5000) {
117
- try {
118
- return await this.page.waitForFrame(async frame => {
119
- return frame.url().indexOf(urlKey) > 0;
120
- }, {timeout});
121
- } catch (e) {
122
- console.log('[chrome.waitFrame.Error]', e.message);
123
- }
124
- }
125
-
126
- async iframe(tag) {
127
- try {
128
- const frame = await (await this.page.$(tag)).contentFrame();
129
- // return new iframe(this.browser, frame, this.params);
130
- return new module.exports(this.browser, frame, this.params, true); //new 自身
131
- } catch (e) {
132
- console.log('[chrome.iframe.Error]', e.message);
133
- }
134
- }
135
-
136
- /**
137
- * 关闭
138
- */
139
- async close(act = 3) {
140
-
141
- try {
142
- if (act & 1) await this.page.close();
143
- if (act & 2) await this.browser.close();
144
- return this;
145
- } catch (e) {
146
- console.log('[chrome.close.Error]', e.message);
147
- }
148
- }
149
-
150
- /**
151
- * 断开进程与浏览器
152
- */
153
- async disconnect() {
154
- try {
155
- await this.browser.disconnect();
156
- return this;
157
- } catch (e) {
158
- console.log('[chrome.disconnect.Error]', e.message);
159
- }
160
- }
161
-
162
- url() {
163
- return this.page.url();
164
- }
165
-
166
-
167
- async size(width = 1024, height = 768) {
168
- await this.page.setViewport({width, height});
169
- return this;
170
- }
171
-
172
- /**
173
- * @param {Object} url
174
- * @param {Object} option
175
- * timeout:
176
- * referer:
177
- * referrerPolicy:
178
- * waitUntil:默认
179
- * load=*等待“加载”事件。,默认
180
- * domcontentloaded *等待“DOMContentLoaded”事件。
181
- * networkidle0:Waits till there are no more than 0 network connections for at least `500`ms
182
- * networkidle2:Waits till there are no more than 2 network connections for at least `500`ms
183
- */
184
- async goto(url, option = {}) {
185
- try {
186
- let {timeout = 0, referer, waitUntil = 'load'} = option;
187
- if (typeof url === 'number') {
188
- if (url < 0) {
189
- await this.page.goBack({timeout, referer, waitUntil});
190
- } else {
191
- await this.page.goForward({timeout, referer, waitUntil});
192
- }
193
- } else {
194
- await this.page.goto(url, {timeout, referer, waitUntil});
195
- }
196
- await this.page.goto(url, {timeout, referer, waitUntil});
197
- return this;
198
- } catch (e) {
199
- console.log('[chrome.open.Error]', e.message);
200
- }
201
- }
202
-
203
- async open(url, option = {}) {
204
- return await this.goto(url, option);
205
- }
206
-
207
- /**
208
- * 直接获取网页全部信息,或设置
209
- */
210
- async content(html, option = {}) {
211
- try {
212
- if (html === undefined) return await this.page.content();
213
- let {timeout = 0, waitUntil = 'load'} = option;
214
- if (this.isFrame) {
215
- await this.page.setFrameContent(html);
216
- } else {
217
- await this.page.setContent(html, {timeout, waitUntil});
218
- }
219
- return this;
220
- } catch (e) {
221
- console.log('[chrome.content.Error]', e.message);
222
- }
223
- }
224
-
225
- /**
226
- * 获取部分或全部HTML
227
- * @param {Object} obj
228
- */
229
- async html(obj) {
230
- try {
231
- if (obj) {
232
- return await this.page.evaluate(ele => ele.innerHTML, obj);
233
- }
234
- return await this.page.evaluate(() => {
235
- return document.documentElement.innerHTML;
236
- });
237
- } catch (e) {
238
- console.log('[chrome.html.Error]', e.message);
239
- }
240
- }
241
-
242
- async text(obj) {
243
- try {
244
- if (obj) {
245
- return await this.page.evaluate(ele => ele.textContent, obj);
246
- }
247
- return await this.page.evaluate(() => {
248
- return document.documentElement.textContent;
249
- });
250
- } catch (e) {
251
- console.log('[chrome.text.Error]', e.message);
252
- }
253
- }
254
-
255
- /**
256
- * 显示或隐藏某个标签
257
- *
258
- * @param {Object} tag
259
- * @param {Object} show 默认显示
260
- */
261
- async display(tag, show) {
262
- try {
263
- // const element = await this.page.querySelector(tag);
264
- // element.style.display = (!!show) ? '' : 'none';
265
- (await this.page.querySelector(tag)).style.display = (!!show) ? '' : 'none';
266
- return this;
267
- } catch (e) {
268
- console.log('[chrome.text.Error]', e.message);
269
- }
270
- }
271
-
272
-
273
- /**
274
- * 遍历tag1里的tag2
275
- *
276
- * @param {Object} tag1
277
- * @param {Object} tag2
278
- * @param {Object} call
279
- */
280
- async elements(tag1, tag2, call) {
281
- try {
282
- const div = await this.page.$(tag1);
283
- if (!div) {
284
- throw new Error(`${tag1} not exists`);
285
- }
286
- for (const elm of (await div.$$(tag2))) {
287
- call(elm, (await elm.evaluate(node => node.innerHTML)));
288
- }
289
- } catch (e) {
290
- console.log('[chrome.elements.Error]', e.message)
291
- }
292
- }
293
-
294
-
295
- /**
296
- * 支持css普通选择器方式和伪类方式
297
- * div.body
298
- * div#body
299
- * div[name=abc]
300
- * iframe:first-child
301
- * div>ul>li:nth-child(4)>a
302
- *
303
- * @param {Object} tag
304
- */
305
- async element(tag) {
306
- try {
307
- return await this.page.$(tag);
308
- } catch (e) {
309
- console.log('[chrome.element.Error]', e.message);
310
- return null;
311
- }
312
- }
313
-
314
-
315
- /**
316
- * 等待浏览器跳转
317
- */
318
- async navigation(timeout = 0, tryCount = 0) {
319
- if (timeout < 200) timeout = timeout * 1000;
320
- try {
321
- await this.page.waitForNavigation({timeout});
322
- return true;
323
- } catch (e) {
324
- if (tryCount > 0) {
325
- return await this.navigation(timeout, --tryCount)
326
- }
327
- return false;
328
- }
329
- }
330
-
331
- /**
332
- * 等待某个元素出现
333
- */
334
- async wait(ele, timeout = 0, tryCount = 0) {
335
- if (timeout < 200) timeout = timeout * 1000;
336
- try {
337
- await this.page.waitForSelector(ele, {timeout});
338
- return true;
339
- } catch (e) {
340
- if (tryCount > 0) {
341
- return await this.wait(ele, timeout, --tryCount)
342
- }
343
- return false;
344
- }
345
- }
346
-
347
-
348
- /**
349
- * 等待x秒
350
- * @param {Object} time
351
- */
352
- async waiting(time) {
353
- return await this.sleep(time);
354
- }
355
-
356
- async sleep(time) {
357
- if (time < 100) time = time * 1000;
358
- try {
359
- await new Promise(res => setTimeout(res, time));
360
- return this;
361
- } catch (e) {
362
- console.log('[chrome.sleep.Error]', e.message);
363
- }
364
- }
365
-
366
- /**
367
- * delay=每键入一个字符延迟毫秒
368
- */
369
- async input(el, value, delay = 1) {
370
- try {
371
- await this.page.type(el, value, {delay});
372
- return this;
373
- } catch (e) {
374
- console.log('[chrome.input.Error]', e.message);
375
- }
376
- }
377
-
378
- /**
379
- * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js
380
- * @param {Object} el
381
- *
382
- * option:
383
- * delay =鼠标按下后延迟释放鼠标的时间(以毫秒为单位)。
384
- * count =次数,默认1
385
- * offset ={x,y}可单击点相对于边框左上角的偏移。
386
- *
387
- */
388
- async click(el, option = {}) {
389
- try {
390
- let {delay = 100, count = 1, x = 6, y = 3} = option;
391
- await this.page.click(el, {delay, count, offset: {x, y}});
392
- return this;
393
- } catch (e) {
394
- console.log('[chrome.click.Error]', e.message);
395
- }
396
- }
397
-
398
- async tap(el) {
399
- try {
400
- await this.page.tap(el);
401
- return this;
402
- } catch (e) {
403
- console.log('[chrome.hover.Error]', e.message);
404
- }
405
- }
406
-
407
- async focus(el) {
408
- try {
409
- await this.page.focus(el);
410
- return this;
411
- } catch (e) {
412
- console.log('[chrome.focus.Error]', e.message);
413
- }
414
- }
415
-
416
- async hover(el) {
417
- try {
418
- await this.page.hover(el);
419
- return this;
420
- } catch (e) {
421
- console.log('[chrome.hover.Error]', e.message);
422
- }
423
- }
424
-
425
-
426
- /**
427
- * 根据目标要求格式提交
428
- *
429
- * @param url
430
- * @param data
431
- */
432
- async post(url, data) {
433
- //JSON.stringify(data)
434
- try {
435
- await this.page.goto(url, {method: 'POST', body: data, waitUntil: 'load'});
436
- return this;
437
- } catch (e) {
438
- console.log('[chrome.post.Error]', e.message);
439
- }
440
- }
441
-
442
-
443
- /**
444
- * 保存HTML
445
- * @param {Object} file
446
- */
447
- async saveHtml(file) {
448
- try {
449
- await this.improveUrls(); //修正js/css的域名
450
- const body = await this.page.evaluate(() => {
451
- return document.documentElement.innerHTML;
452
- });
453
- await fs.writeFileSync(file, body);
454
- return this;
455
- } catch (e) {
456
- console.log('[chrome.saveHtml.Error]', e.parse());
457
- }
458
- }
459
-
460
-
461
- jsonArray(cookiesVal, host) {
462
- if (!cookiesVal) return [];
463
- return JSON.parse(cookiesVal).map(cook => {
464
- let {name, value, domain} = cook;
465
- if (!domain) domain = host;
466
- return {name, value, domain};
467
- });
468
- }
469
-
470
- /**
471
-
472
- // const url = parseUrl(this.page.url());
473
- // const host = '.' + url.host.split('.').slice(-2).join('.');
474
- // let cookiesVal = read(file, 'utf8');
475
- // cookiesVal = this.jsonArray(cookiesVal, host);
476
-
477
- */
478
-
479
- /**
480
- * 合并两个Cookies,并以后面的值为准
481
- *
482
- * @param {Object} oldCookies
483
- * @param {Object} newCookies
484
- */
485
- async mergeNewCookies(oldCookies, newCookies) {
486
- if (newCookies.length === 0) return oldCookies;
487
- // let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
488
- let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
489
- newCookies.map(obj => {
490
- aMap.set(obj.name, obj)
491
- });
492
- return await Array.from(aMap.values());
493
- }
494
-
495
-
496
- /**
497
- * 获取当前页面的Cookies
498
- */
499
- async getCookies() {
500
- try {
501
- return await this.page.cookies();
502
- } catch (e) {
503
- console.log('[chrome.getCookies.Error]', e.message);
504
- return [];
505
- }
506
- }
507
-
508
- /**
509
- * 设置Cookies
510
- * @param {Object} cookies
511
- */
512
- async setCookies(cookies) {
513
- try {
514
- await this.page.setCookie(...cookies);
515
- return this;
516
- } catch (e) {
517
- console.log('[chrome.setCookies.Error]', e.message);
518
- }
519
- }
520
-
521
- /**
522
- * 保存当前页面中的Cookies
523
- * @param {Object} file
524
- */
525
- async saveCookies(file, append = true) {
526
- try {
527
- let cookies = await this.page.cookies();
528
-
529
- if (append && fs.existsSync(file)) {
530
- let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
531
- cookies = this.mergeNewCookies(dbCookies, cookies);
532
- }
533
-
534
- await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
535
- return this;
536
- } catch (e) {
537
- console.log('[chrome.saveCookies.Error]', e.parse());
538
- }
539
- }
540
-
541
- /**
542
- * 加水印,这里实际上是在网页加个DIV并显示时间
543
- * @param {Object} conf
544
- */
545
- async watermark(conf) {
546
- try {
547
- const tmpID = 'watermark' + Date.now();
548
- await this.page.evaluate((conf, tmpID) => {
549
- const wmDiv = document.createElement('div');
550
- wmDiv.id = tmpID;
551
- wmDiv.style.position = 'fixed';
552
- wmDiv.style.top = `50%`;
553
- wmDiv.style.left = `50%`;
554
- wmDiv.style.transform = 'translate(-50%, -50%)';
555
- wmDiv.style.fontSize = `36px`;
556
- wmDiv.style.zIndex = '10000';
557
- // wmDiv.style.color = color;
558
- wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
559
- wmDiv.style.pointerEvents = 'none';
560
- wmDiv.innerText = conf.text;
561
- document.body.appendChild(wmDiv);
562
- }, conf, tmpID);
563
- await this.page.waitForSelector(`div#${tmpID}`, {timeout: 1000});
564
- return this;
565
- } catch (e) {
566
- console.log('[chrome.watermark.Error]', e.message);
567
- }
568
- }
569
-
570
-
571
- /**
572
- * 网页拍照
573
- * @param file
574
- * @param quality
575
- */
576
- async photograph(file, quality = 50) {
577
- try {
578
- await this.page.screenshot({
579
- path: file,
580
- fullPage: true, //全屏
581
- type: 'jpeg',
582
- quality: quality,
583
- omitBackground: true, //显示背景
584
- });
585
- // console.log('photograph=', file);
586
- return this;
587
- } catch (e) {
588
- console.log('[chrome.photograph.Error]', e.message);
589
- }
590
- }
591
-
592
- /**
593
- * 补全所有本地js/css
594
- */
595
- async improveUrls() {
596
- const url = parseUrl(this.page.url());
597
- const domain = url.protocol + '//' + url.host;
598
- await this.page.evaluate((domain) => {
599
- try {
600
- const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
601
- tags.forEach((ele) => {
602
- const src = ele.getAttribute('src');
603
- const href = ele.getAttribute('href');
604
- if (src && src.startsWith('/')) ele.src = domain + src;
605
- if (href && href.startsWith('/')) ele.href = domain + href;
606
- });
607
- } catch (e) {
608
- console.log('[chrome.improveUrls.Error]', e.message);
609
- }
610
- }, domain);
611
- }
612
-
613
-
614
- /**
615
- * 解析网页set-cookies的值
616
- *
617
- * @param {Object} strCookies
618
- */
619
- async parseCookies(strCookies) {
620
- return await strCookies.split("\n").map((ls, i) => {
621
- let value = {};
622
- ls.split(';').map((ln, j) => {
623
- // console.log(ln);
624
- const arr = ln.split('=');
625
- const Key = (arr[0]).trim();
626
- if (!Key) return;
627
-
628
- if (j === 0) {
629
- value.name = Key;
630
- value.value = arr[1];
631
- } else if (Key === 'Max-Age') {
632
- value.expire = parseInt(arr[1]) + (Date.now() / 1000);
633
- } else if (Key === 'Secure') {
634
- value.source = true;
635
- value.sourceScheme = 'Secure';
636
- } else {
637
- value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
638
- }
639
- })
640
- return value;
641
- })
642
- }
643
-
644
-
645
- async parseResponse(response) {
646
- // const response = await this.page.waitForResponse(res => res);
647
-
648
- const value = {};
649
- const request = await response.request();
650
- const headers = await response.headers();
651
- value.method = await request.method();
652
- if (value.method === 'OPTIONS') return;
653
-
654
- value.type = await request.resourceType();
655
- // value.redirect = await response.redirectURL();
656
- if (value.type === 'xhr') value.type = 'AJAX';
657
- value.url = await response.url();
658
- value.domain = parseUrl(value.url)['host'];
659
- value.content = headers['content-type'];
660
- value.length = headers['content-length'];
661
- value.status = await response.status();
662
- value.ok = await response.ok();
663
- value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
664
- // value.headers = headers;
665
- if (headers['server']) value.server = headers['server'];
666
- if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
667
- value.remote = await response.remoteAddress(); //目标服务器
668
- if (value.status === 301 || value.status === 302) return value;
669
- if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
670
- if (value.content) {
671
- if (value.content.startsWith('application/vnd')) return value;
672
- if (value.content.startsWith('application/xml')) return value;
673
- if (value.content.startsWith('text/css')) return value;
674
- }
675
-
676
- value.post = await request.postData();
677
- if (value.post) value.post = value.post.toString();
678
-
679
- try {
680
- value.buffer = await response.buffer();
681
- value.response = value.buffer.toString();
682
- value.json = JSON.parse(value.response);
683
- } catch (e) {
684
- value.json = e.parse();
685
- }
686
-
687
- return value;
688
- }
689
-
690
-
691
- doListening(params) {
692
-
693
- this.page.on('request', async (request) => {
694
- if (this.requestCall) {
695
- const run = await this.requestCall(request);
696
- if (run === false) {
697
- request.abort();
698
- return;
699
- }
700
- }
701
-
702
- const {abort_img = false, no_cache = true, append_headers = {}} = params;
703
- if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
704
- request.abort();
705
- return;
706
- }
707
-
708
- const headers = request.headers();
709
- if (append_headers !== {}) Object.assign(headers, append_headers);
710
-
711
- headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
712
- headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
713
- // headers['Access-Control-Allow-Headers'] = 'Content-Type';
714
-
715
- if (no_cache) {
716
- const rType = request.resourceType();
717
- if (rType === 'script' || rType === 'stylesheet') {
718
- headers['Cache-Control'] = 'no-store'; // 禁用缓存
719
- }
720
- }
721
-
722
- request.continue({headers});
723
- });
724
-
725
- this.page.on('response', async res => {
726
- if (!this.responseCall) return;
727
- let json = await this.parseResponse(res);
728
- this.responseCall(json);
729
- });
730
- }
731
-
732
-
1
+ const fs = require("fs");
2
+ const { parse: parseUrl } = require("url");
3
+
4
+
5
+ /**
6
+ *
7
+ * class CdpFrame extends _classSuper
8
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Frame.js
9
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\cdp\Page.js
10
+ * \node_modules\puppeteer-core\src\api\Page.ts
11
+ *
12
+ * @type {exports}
13
+ */
14
+ module.exports = class {
15
+ browser = null;
16
+ page = null;
17
+ responseCall = null;
18
+ requestCall = null;
19
+ isFrame = false; //是不是在iFrame中
20
+ visible = false; //是否可见,也就是有没有启动窗口
21
+ params = {};
22
+
23
+ constructor(browser, page, params, isFrame = false) {
24
+ this.browser = browser;
25
+ this.page = page;
26
+ this.params = params;
27
+ this.visible = !!params.visible;
28
+ this.isFrame = !!isFrame;
29
+ this.doListening(params);
30
+ }
31
+
32
+ request(call) {
33
+ this.requestCall = call;
34
+ return this;
35
+ }
36
+
37
+ response(call) {
38
+ this.responseCall = call;
39
+ return this;
40
+ }
41
+
42
+ /**
43
+ * 重新在当前浏览器创建新窗口
44
+ */
45
+ async clone() {
46
+ try {
47
+ const page = await this.browser.newPage();
48
+ await page.setRequestInterception(true); //允许拦截
49
+ // await page.setUserAgent(this.params.ua);
50
+ await page.evaluateOnNewDocument(() => {
51
+ const newProto = navigator.__proto__;
52
+ delete newProto.webdriver; //删除 navigator.webdriver字段
53
+ navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
54
+ });
55
+ return new module.exports(this.browser, page, this.params, false); //new 自身
56
+ }
57
+ catch (e) {
58
+ console.log('[chrome.iframe.Error]', e.message);
59
+ }
60
+ }
61
+
62
+ /**
63
+ * page.on方法
64
+ */
65
+ on(key, call) {
66
+ try {
67
+ this.page.on(key, call);
68
+ return this;
69
+ }
70
+ catch (e) {
71
+ console.log('[chrome.on.Error]', e.message);
72
+ }
73
+ }
74
+
75
+ once(key, call) {
76
+ try {
77
+ this.page.once(key, call);
78
+ return this;
79
+ }
80
+ catch (e) {
81
+ console.log('[chrome.on.Error]', e.message);
82
+ }
83
+ }
84
+
85
+ off(key, call) {
86
+ try {
87
+ this.page.off(key, call);
88
+ return this;
89
+ }
90
+ catch (e) {
91
+ console.log('[chrome.on.Error]', e.message);
92
+ }
93
+ }
94
+
95
+ emit(key, value) {
96
+ try {
97
+ this.page.emit(key, value);
98
+ return this;
99
+ }
100
+ catch (e) {
101
+ console.log('[chrome.on.Error]', e.message);
102
+ }
103
+ }
104
+
105
+ /**
106
+ * 这必须放在已经打开过这个URL之后
107
+ */
108
+ waitForFrame() {
109
+ try {
110
+ this.page.waitForFrame(async frame => {
111
+ return frame.name() === 'Test';
112
+ });
113
+ return this;
114
+ }
115
+ catch (e) {
116
+ console.log('[chrome.waitForFrame.Error]', e.message);
117
+ }
118
+ }
119
+
120
+
121
+ /**
122
+ * 等待框架URL包含指定字串
123
+ */
124
+ async waitFrame(urlKey, timeout = 5000) {
125
+ try {
126
+ return await this.page.waitForFrame(async frame => {
127
+ return frame.url().indexOf(urlKey) > 0;
128
+ }, { timeout });
129
+ }
130
+ catch (e) {
131
+ console.log('[chrome.waitFrame.Error]', e.message);
132
+ }
133
+ }
134
+
135
+ /**
136
+ * 按tag创建当前窗口中的某个iFrame
137
+ * @param {Object} tag
138
+ */
139
+ async iframe(tag) {
140
+ try {
141
+ const frame = await (await this.page.$(tag)).contentFrame();
142
+ // return new iframe(this.browser, frame, this.params);
143
+ return new module.exports(this.browser, frame, this.params, true); //new 自身
144
+ }
145
+ catch (e) {
146
+ console.log('[chrome.iframe.Error]', e.message);
147
+ }
148
+ }
149
+
150
+ /**
151
+ * 关闭
152
+ */
153
+ async close(act = 3) {
154
+
155
+ try {
156
+ if (act & 1) await this.page.close();
157
+ if (act & 2) await this.browser.close();
158
+ return this;
159
+ }
160
+ catch (e) {
161
+ console.log('[chrome.close.Error]', e.message);
162
+ }
163
+ }
164
+
165
+ /**
166
+ * 断开进程与浏览器
167
+ */
168
+ async disconnect() {
169
+ try {
170
+ await this.browser.disconnect();
171
+ return this;
172
+ }
173
+ catch (e) {
174
+ console.log('[chrome.disconnect.Error]', e.message);
175
+ }
176
+ }
177
+
178
+ url() {
179
+ return this.page.url();
180
+ }
181
+
182
+ /**
183
+ * 重新设置尺寸
184
+ */
185
+ async size(width = 1024, height = 768) {
186
+ await this.page.setViewport({ width, height });
187
+ return this;
188
+ }
189
+
190
+ /**
191
+ * @param {Object} url
192
+ * @param {Object} option
193
+ * timeout:
194
+ * referer:
195
+ * referrerPolicy:
196
+ * waitUntil:默认
197
+ * load=*等待“加载”事件。,默认
198
+ * domcontentloaded *等待“DOMContentLoaded”事件。
199
+ * networkidle0:Waits till there are no more than 0 network connections for at least `500`ms
200
+ * networkidle2:Waits till there are no more than 2 network connections for at least `500`ms
201
+ */
202
+ async goto(url, option = {}) {
203
+ try {
204
+ let { timeout = 0, referer, waitUntil = 'load' } = option;
205
+ if (typeof url === 'number') {
206
+ if (url < 0) {
207
+ await this.page.goBack({ timeout, referer, waitUntil });
208
+ }
209
+ else {
210
+ await this.page.goForward({ timeout, referer, waitUntil });
211
+ }
212
+ }
213
+ else {
214
+ await this.page.goto(url, { timeout, referer, waitUntil });
215
+ }
216
+ await this.page.goto(url, { timeout, referer, waitUntil });
217
+ return this;
218
+ }
219
+ catch (e) {
220
+ console.log('[chrome.open.Error]', e.message);
221
+ }
222
+ }
223
+
224
+ async open(url, option = {}) {
225
+ return await this.goto(url, option);
226
+ }
227
+
228
+ /**
229
+ * 直接获取网页全部信息,或设置
230
+ */
231
+ async content(html, option = {}) {
232
+ try {
233
+ if (html === undefined) return await this.page.content();
234
+ let { timeout = 0, waitUntil = 'load' } = option;
235
+ if (this.isFrame) {
236
+ await this.page.setFrameContent(html);
237
+ }
238
+ else {
239
+ await this.page.setContent(html, { timeout, waitUntil });
240
+ }
241
+ return this;
242
+ }
243
+ catch (e) {
244
+ console.log('[chrome.content.Error]', e.message);
245
+ }
246
+ }
247
+
248
+ /**
249
+ * 获取部分或全部HTML
250
+ * @param {Object} obj
251
+ */
252
+ async html(obj) {
253
+ try {
254
+ if (obj) {
255
+ return await this.page.evaluate(ele => ele.innerHTML, obj);
256
+ }
257
+ return await this.page.evaluate(() => {
258
+ return document.documentElement.innerHTML;
259
+ });
260
+ }
261
+ catch (e) {
262
+ console.log('[chrome.html.Error]', e.message);
263
+ }
264
+ }
265
+
266
+ async text(obj) {
267
+ try {
268
+ if (obj) {
269
+ return await this.page.evaluate(ele => ele.textContent, obj);
270
+ }
271
+ return await this.page.evaluate(() => {
272
+ return document.documentElement.textContent;
273
+ });
274
+ }
275
+ catch (e) {
276
+ console.log('[chrome.text.Error]', e.message);
277
+ }
278
+ }
279
+
280
+ /**
281
+ * 显示或隐藏某个标签
282
+ *
283
+ * @param {Object} tag
284
+ * @param {Object} show 默认显示
285
+ */
286
+ async display(tag, show) {
287
+ try {
288
+ const element = await this.page.querySelector(tag);
289
+ if (!element) throw new Error(`${tag} not exists`);
290
+ element.style.display = (!!show) ? '' : 'none';
291
+ return this;
292
+ }
293
+ catch (e) {
294
+ console.log('[chrome.text.Error]', e.message);
295
+ }
296
+ }
297
+
298
+
299
+ /**
300
+ * 遍历tag1里的tag2
301
+ *
302
+ * @param {Object} tag1
303
+ * @param {Object} tag2
304
+ * @param {Object} call
305
+ */
306
+ async elements(tag1, tag2, call) {
307
+ try {
308
+ const div = await this.page.$(tag1);
309
+ if (!div) {
310
+ throw new Error(`${tag1} not exists`);
311
+ }
312
+ for (const elm of (await div.$$(tag2))) {
313
+ call(elm, (await elm.evaluate(node => node.innerHTML)));
314
+ }
315
+ }
316
+ catch (e) {
317
+ console.log('[chrome.elements.Error]', e.message)
318
+ }
319
+ }
320
+
321
+
322
+ /**
323
+ * 支持css普通选择器方式和伪类方式
324
+ * div.body
325
+ * div#body
326
+ * div[name=abc]
327
+ * iframe:first-child
328
+ * div>ul>li:nth-child(4)>a
329
+ *
330
+ * @param {Object} tag
331
+ */
332
+ async element(tag) {
333
+ try {
334
+ return await this.page.$(tag);
335
+ }
336
+ catch (e) {
337
+ console.log('[chrome.element.Error]', e.message);
338
+ return null;
339
+ }
340
+ }
341
+
342
+
343
+ /**
344
+ * 等待浏览器跳转
345
+ */
346
+ async navigation(timeout = 0, tryCount = 0) {
347
+ if (timeout < 200) timeout = timeout * 1000;
348
+ try {
349
+ await this.page.waitForNavigation({ timeout });
350
+ return true;
351
+ }
352
+ catch (e) {
353
+ if (tryCount > 0) {
354
+ return await this.navigation(timeout, --tryCount)
355
+ }
356
+ return false;
357
+ }
358
+ }
359
+
360
+ /**
361
+ * 等待某个元素出现
362
+ */
363
+ async wait(ele, timeout = 0, tryCount = 0) {
364
+ if (timeout < 200) timeout = timeout * 1000;
365
+ try {
366
+ await this.page.waitForSelector(ele, { timeout });
367
+ return true;
368
+ }
369
+ catch (e) {
370
+ if (tryCount > 0) {
371
+ return await this.wait(ele, timeout, --tryCount)
372
+ }
373
+ return false;
374
+ }
375
+ }
376
+
377
+
378
+ /**
379
+ * 等待x秒
380
+ * @param {Object} time
381
+ */
382
+ async waiting(time) {
383
+ return await this.sleep(time);
384
+ }
385
+ async sleep(time) {
386
+ if (time < 100) time = time * 1000;
387
+ try {
388
+ await new Promise(res => setTimeout(res, time));
389
+ return this;
390
+ }
391
+ catch (e) {
392
+ console.log('[chrome.sleep.Error]', e.message);
393
+ }
394
+ }
395
+
396
+ /**
397
+ * delay=每键入一个字符延迟毫秒
398
+ */
399
+ async input(el, value, delay = 1) {
400
+ try {
401
+ await this.page.type(el, value, { delay });
402
+ return this;
403
+ }
404
+ catch (e) {
405
+ console.log('[chrome.input.Error]', e.message);
406
+ }
407
+ }
408
+
409
+ /**
410
+ * \node_modules\nodejs_chromium\node_modules\puppeteer-core\lib\cjs\puppeteer\api\Page.js
411
+ * @param {Object} el
412
+ *
413
+ * option:
414
+ * delay =鼠标按下后延迟释放鼠标的时间(以毫秒为单位)。
415
+ * count =次数,默认1
416
+ * offset ={x,y}可单击点相对于边框左上角的偏移。
417
+ *
418
+ */
419
+ async click(el, option = {}) {
420
+ try {
421
+ let { delay = 100, count = 1, x = 6, y = 3 } = option;
422
+ await this.page.click(el, { delay, count, offset: { x, y } });
423
+ return this;
424
+ }
425
+ catch (e) {
426
+ console.log('[chrome.click.Error]', e.message);
427
+ }
428
+ }
429
+
430
+ async tap(el) {
431
+ try {
432
+ await this.page.tap(el);
433
+ return this;
434
+ }
435
+ catch (e) {
436
+ console.log('[chrome.hover.Error]', e.message);
437
+ }
438
+ }
439
+
440
+ async focus(el) {
441
+ try {
442
+ await this.page.focus(el);
443
+ return this;
444
+ }
445
+ catch (e) {
446
+ console.log('[chrome.focus.Error]', e.message);
447
+ }
448
+ }
449
+
450
+ async hover(el) {
451
+ try {
452
+ await this.page.hover(el);
453
+ return this;
454
+ }
455
+ catch (e) {
456
+ console.log('[chrome.hover.Error]', e.message);
457
+ }
458
+ }
459
+
460
+
461
+ /**
462
+ * 根据目标要求格式提交
463
+ *
464
+ * @param url
465
+ * @param data
466
+ */
467
+ async post(url, data) {
468
+ //JSON.stringify(data)
469
+ try {
470
+ await this.page.goto(url, { method: 'POST', body: data, waitUntil: 'load' });
471
+ return this;
472
+ }
473
+ catch (e) {
474
+ console.log('[chrome.post.Error]', e.message);
475
+ }
476
+ }
477
+
478
+
479
+ /**
480
+ * 保存HTML
481
+ * @param {Object} file
482
+ */
483
+ async saveHtml(file) {
484
+ try {
485
+ await this.improveUrls(); //修正js/css的域名
486
+ const body = await this.page.evaluate(() => {
487
+ return document.documentElement.innerHTML;
488
+ });
489
+ await fs.writeFileSync(file, body);
490
+ return this;
491
+ }
492
+ catch (e) {
493
+ console.log('[chrome.saveHtml.Error]', e.message);
494
+ }
495
+ }
496
+
497
+
498
+ jsonArray(cookiesVal, host) {
499
+ if (!cookiesVal) return [];
500
+ return JSON.parse(cookiesVal).map(cook => {
501
+ let { name, value, domain } = cook;
502
+ if (!domain) domain = host;
503
+ return { name, value, domain };
504
+ });
505
+ }
506
+
507
+ /**
508
+
509
+ // const url = parseUrl(this.page.url());
510
+ // const host = '.' + url.host.split('.').slice(-2).join('.');
511
+ // let cookiesVal = read(file, 'utf8');
512
+ // cookiesVal = this.jsonArray(cookiesVal, host);
513
+
514
+ */
515
+
516
+ /**
517
+ * 合并两个Cookies,并以后面的值为准
518
+ *
519
+ * @param {Object} oldCookies
520
+ * @param {Object} newCookies
521
+ */
522
+ async mergeNewCookies(oldCookies, newCookies) {
523
+ if (newCookies.length === 0) return oldCookies;
524
+ // let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
525
+ let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
526
+ newCookies.map(obj => {
527
+ aMap.set(obj.name, obj)
528
+ });
529
+ return await Array.from(aMap.values());
530
+ }
531
+
532
+
533
+ /**
534
+ * 获取当前页面的Cookies
535
+ */
536
+ async getCookies() {
537
+ try {
538
+ return await this.page.cookies();
539
+ }
540
+ catch (e) {
541
+ console.log('[chrome.getCookies.Error]', e.message);
542
+ return [];
543
+ }
544
+ }
545
+
546
+ /**
547
+ * 设置Cookies
548
+ * @param {Object} cookies
549
+ */
550
+ async setCookies(cookies) {
551
+ try {
552
+ await this.page.setCookie(...cookies);
553
+ return this;
554
+ }
555
+ catch (e) {
556
+ console.log('[chrome.setCookies.Error]', e.message);
557
+ console.log(cookies);
558
+ }
559
+ }
560
+
561
+ /**
562
+ * 保存当前页面中的Cookies
563
+ * @param {Object} file
564
+ */
565
+ async saveCookies(file, append = true) {
566
+ try {
567
+ let cookies = await this.page.cookies();
568
+
569
+ if (append && fs.existsSync(file)) {
570
+ let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
571
+ cookies = this.mergeNewCookies(dbCookies, cookies);
572
+ }
573
+
574
+ await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
575
+ return this;
576
+ }
577
+ catch (e) {
578
+ console.log('[chrome.saveCookies.Error]', e.message);
579
+ }
580
+ }
581
+
582
+ /**
583
+ * 加水印,这里实际上是在网页加个DIV并显示时间
584
+ * @param {Object} conf
585
+ */
586
+ async watermark(conf) {
587
+ try {
588
+ const tmpID = 'watermark' + Date.now();
589
+ await this.page.evaluate((conf, tmpID) => {
590
+ const wmDiv = document.createElement('div');
591
+ wmDiv.id = tmpID;
592
+ wmDiv.style.position = 'fixed';
593
+ wmDiv.style.top = `50%`;
594
+ wmDiv.style.left = `50%`;
595
+ wmDiv.style.transform = 'translate(-50%, -50%)';
596
+ wmDiv.style.fontSize = `36px`;
597
+ wmDiv.style.zIndex = '10000';
598
+ // wmDiv.style.color = color;
599
+ wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
600
+ wmDiv.style.pointerEvents = 'none';
601
+ wmDiv.innerText = conf.text;
602
+ document.body.appendChild(wmDiv);
603
+ }, conf, tmpID);
604
+ await this.page.waitForSelector(`div#${tmpID}`, { timeout: 1000 });
605
+ return this;
606
+ }
607
+ catch (e) {
608
+ console.log('[chrome.watermark.Error]', e.message);
609
+ }
610
+ }
611
+
612
+
613
+ /**
614
+ * 网页拍照
615
+ * @param file
616
+ * @param quality
617
+ */
618
+ async photograph(file, quality = 50) {
619
+ try {
620
+ await this.page.screenshot({
621
+ path: file,
622
+ fullPage: true, //全屏
623
+ type: 'jpeg',
624
+ quality: quality,
625
+ omitBackground: true, //显示背景
626
+ });
627
+ // console.log('photograph=', file);
628
+ return this;
629
+ }
630
+ catch (e) {
631
+ console.log('[chrome.photograph.Error]', e.message);
632
+ }
633
+ }
634
+
635
+ /**
636
+ * 补全所有本地js/css
637
+ */
638
+ async improveUrls() {
639
+ const url = parseUrl(this.page.url());
640
+ const domain = url.protocol + '//' + url.host;
641
+ await this.page.evaluate((domain) => {
642
+ try {
643
+ const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
644
+ tags.forEach((ele) => {
645
+ const src = ele.getAttribute('src');
646
+ const href = ele.getAttribute('href');
647
+ if (src && src.startsWith('/')) ele.src = domain + src;
648
+ if (href && href.startsWith('/')) ele.href = domain + href;
649
+ });
650
+ }
651
+ catch (e) {
652
+ console.log('[chrome.improveUrls.Error]', e.message);
653
+ }
654
+ }, domain);
655
+ }
656
+
657
+
658
+ /**
659
+ * 解析网页set-cookies的值
660
+ *
661
+ * @param {Object} strCookies
662
+ */
663
+ async parseCookies(strCookies) {
664
+ return await strCookies.split("\n").map((ls) => {
665
+ let value = {};
666
+ ls.split(';').map((ln, j) => {
667
+ // console.log(ln);
668
+ const arr = ln.split('=');
669
+ const Key = (arr[0]).trim();
670
+ if (!Key) return;
671
+
672
+ if (j === 0) {
673
+ value.name = Key;
674
+ value.value = arr[1];
675
+ }
676
+ else if (Key === 'Max-Age') {
677
+ value.expire = parseInt(arr[1]) + (Date.now() / 1000);
678
+ }
679
+ else if (Key === 'Secure') {
680
+ value.source = true;
681
+ value.sourceScheme = 'Secure';
682
+ }
683
+ else {
684
+ value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
685
+ }
686
+ })
687
+ return value;
688
+ })
689
+ }
690
+
691
+
692
+ async parseResponse(response) {
693
+ // const response = await this.page.waitForResponse(res => res);
694
+
695
+ const value = {};
696
+ const request = await response.request();
697
+ const headers = await response.headers();
698
+ value.method = await request.method();
699
+ if (value.method === 'OPTIONS') return;
700
+
701
+ value.type = await request.resourceType();
702
+ // value.redirect = await response.redirectURL();
703
+ if (value.type === 'xhr') value.type = 'AJAX';
704
+ value.url = await response.url();
705
+ value.domain = parseUrl(value.url)['host'];
706
+ value.content = headers['content-type'];
707
+ value.length = headers['content-length'];
708
+ value.status = await response.status();
709
+ value.ok = await response.ok();
710
+ value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
711
+ // value.headers = headers;
712
+ if (headers['server']) value.server = headers['server'];
713
+ if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
714
+ value.remote = await response.remoteAddress(); //目标服务器
715
+ if (value.status === 301 || value.status === 302) return value;
716
+ if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
717
+ if (value.content) {
718
+ if (value.content.startsWith('application/vnd')) return value;
719
+ if (value.content.startsWith('application/xml')) return value;
720
+ if (value.content.startsWith('text/css')) return value;
721
+ }
722
+
723
+ value.post = await request.postData();
724
+ if (value.post) value.post = value.post.toString();
725
+
726
+ try {
727
+ value.buffer = await response.buffer();
728
+ value.response = value.buffer.toString();
729
+ value.json = JSON.parse(value.response);
730
+ }
731
+ catch (e) {
732
+ value.json = e.parse();
733
+ }
734
+
735
+ return value;
736
+ }
737
+
738
+
739
+ doListening(params) {
740
+
741
+ this.page.on('request', async (request) => {
742
+ if (this.requestCall) {
743
+ const run = await this.requestCall(request);
744
+ if (run === false) {
745
+ request.abort();
746
+ return;
747
+ }
748
+ }
749
+
750
+ const { abort_img = false, no_cache = true, append_headers = {} } = params;
751
+ if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
752
+ request.abort();
753
+ return;
754
+ }
755
+
756
+ const headers = request.headers();
757
+ if (append_headers !== {}) Object.assign(headers, append_headers);
758
+
759
+ headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
760
+ headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
761
+ // headers['Access-Control-Allow-Headers'] = 'Content-Type';
762
+
763
+ if (no_cache) {
764
+ const rType = request.resourceType();
765
+ if (rType === 'script' || rType === 'stylesheet') {
766
+ headers['Cache-Control'] = 'no-store'; // 禁用缓存
767
+ }
768
+ }
769
+
770
+ request.continue({ headers });
771
+ });
772
+
773
+ this.page.on('response', async res => {
774
+ if (!this.responseCall) return;
775
+ let json = await this.parseResponse(res);
776
+ await this.responseCall(json);
777
+ });
778
+ }
779
+
780
+
733
781
  }