nodejs_chromium 1.0.7 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -16,7 +16,12 @@ async function newChrome(params) {
16
16
  incognito = true,
17
17
  path = void 0,
18
18
  slowMo = 1,
19
- ua = void 0
19
+ ua = void 0,
20
+ proxy = null,
21
+ cookies = null, //若cookies=false,则不处理cookies,不指定则由chrome处理,若=文件路径
22
+ abort_img = false,
23
+ no_cache = true,
24
+ headers = {},
20
25
  } = params;
21
26
 
22
27
  let option = {
@@ -26,9 +31,6 @@ async function newChrome(params) {
26
31
  headless: visible ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
27
32
  devtools: !!debug, //打开调试
28
33
  ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
29
- ignoreDefaultArgs: [
30
- "--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
31
- ],
32
34
  dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
33
35
  defaultViewport: {
34
36
  width,
@@ -36,26 +38,32 @@ async function newChrome(params) {
36
38
  deviceScaleFactor: scale, //缩放比例
37
39
  isMobile: !!mobile,
38
40
  },
41
+ defaultArgs: [
42
+ '--disable-extensions', //禁止启动扩展
43
+ '--no-sandbox', //禁用沙箱模式
44
+ '--disable-setuid-sandbox', //禁用设置用户身份沙盒
45
+ '--disable-web-security', //禁用同源策略
46
+ ],
47
+ ignoreDefaultArgs: [
48
+ "--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
49
+ ],
39
50
  args: [
40
51
  '--enable-chrome-browser-cloud-management', //Cloud Browser Client Management (CBCM)
41
52
  '--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
42
53
  `--window-size=${width},${height}`,
43
54
  '--no-sandbox', //禁用沙箱模式
44
55
  '--disable-setuid-sandbox', //禁用 setuid 沙箱。这是另一种沙箱模式,通常用于 Linux 系统上。
45
- '--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
56
+ // '--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
46
57
  '--disable-gpu', //禁用 GPU 加速。这通常用于在服务器环境或某些不支持 GPU 加速的平台上运行 Chrome。
47
- '--disable-blink-features=AutomationControlled', //禁用 blink 引擎的自动化控制特性,防止网站通过检测 blink 引擎的特性来判断浏览器是否处于自动化控制状态。
48
- // 在自动化测试或爬虫等场景中,这个参数可以帮助隐藏浏览器的自动化痕迹,使得浏览器行为更接近于真实用户操作
58
+ '--disable-blink-features=AutomationControlled', //防止检测 blink 引擎特性判断是否处于自动化控制状态。
49
59
  '--lang=zh-CN', //设置中文环境
50
60
  '--disable-extensions', //禁止启动扩展
51
61
  '--disable-dev-shm-usage', //Linux系统中使用普通的文件系统缓存避免因为/dev/shm大小不足而导致的问题
52
62
  ]
53
63
  }
54
64
 
55
- if (incognito) { //使用无痕模式启动
56
- option.args.push('--incognito', '--disable-infobars')
57
- }
58
-
65
+ if (proxy) option.defaultArgs.push(`--proxy-server=${proxy}`);
66
+ if (incognito) option.args.push('--incognito', '--disable-infobars'); //使用无痕模式启动
59
67
  if (path) option.executablePath = path; //指定chrome安装路径
60
68
 
61
69
  const browser = await puppeteer.launch(option);
@@ -65,11 +73,13 @@ async function newChrome(params) {
65
73
  if (ua) await page.setUserAgent(ua);
66
74
  await page.evaluateOnNewDocument(() => {
67
75
  const newProto = navigator.__proto__;
68
- delete newProto.webdriver; //删除 navigator.webdriver字段
76
+ delete newProto.webdriver; //删除 navigator.webdriver字段,防止检测到自动化行为
69
77
  navigator.__proto__ = newProto;
70
- }); //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
78
+ });
71
79
 
72
- return new chrome(browser, page, params);
80
+ if (cookies !== false) cookies = `runtime/cookies/${id}`;
81
+ const pageOption = { cookies, visible, abort_img, no_cache, headers };
82
+ return new chrome(browser, page, pageOption);
73
83
  }
74
84
 
75
85
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodejs_chromium",
3
- "version": "1.0.7",
3
+ "version": "1.0.8",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/chrome.js CHANGED
@@ -1,4 +1,5 @@
1
1
  const fs = require("fs");
2
+ const Cookies = require("./cookies");
2
3
  const { parse: parseUrl } = require("url");
3
4
 
4
5
 
@@ -12,21 +13,23 @@ const { parse: parseUrl } = require("url");
12
13
  * @type {exports}
13
14
  */
14
15
  module.exports = class {
15
- browser = null;
16
- page = null;
17
- responseCall = null;
18
- requestCall = null;
16
+ browser = void 0;
17
+ page = void 0;
18
+ responseCall = void 0;
19
+ requestCall = void 0;
20
+ cookies = void 0;
21
+ options = void 0;
19
22
  isFrame = false; //是不是在iFrame中
20
23
  visible = false; //是否可见,也就是有没有启动窗口
21
- params = {};
22
24
 
23
- constructor(browser, page, params, isFrame = false) {
25
+ constructor(browser, page, options, isFrame = false) {
24
26
  this.browser = browser;
25
27
  this.page = page;
26
- this.params = params;
27
- this.visible = !!params.visible;
28
+ this.options = options;
29
+ this.cookies = new Cookies(page, options.cookies);
30
+ this.visible = !!options.visible;
28
31
  this.isFrame = !!isFrame;
29
- this.doListening(params);
32
+ this.doListening(options);
30
33
  }
31
34
 
32
35
  request(call) {
@@ -46,13 +49,12 @@ module.exports = class {
46
49
  try {
47
50
  const page = await this.browser.newPage();
48
51
  await page.setRequestInterception(true); //允许拦截
49
- // await page.setUserAgent(this.params.ua);
50
52
  await page.evaluateOnNewDocument(() => {
51
53
  const newProto = navigator.__proto__;
52
54
  delete newProto.webdriver; //删除 navigator.webdriver字段
53
55
  navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
54
56
  });
55
- return new module.exports(this.browser, page, this.params, false); //new 自身
57
+ return new module.exports(this.browser, page, this.options, false); //new 自身
56
58
  }
57
59
  catch (e) {
58
60
  console.log('[chrome.iframe.Error]', e.message);
@@ -139,8 +141,7 @@ module.exports = class {
139
141
  async iframe(tag) {
140
142
  try {
141
143
  const frame = await (await this.page.$(tag)).contentFrame();
142
- // return new iframe(this.browser, frame, this.params);
143
- return new module.exports(this.browser, frame, this.params, true); //new 自身
144
+ return new module.exports(this.browser, frame, this.options, true); //new 自身
144
145
  }
145
146
  catch (e) {
146
147
  console.log('[chrome.iframe.Error]', e.message);
@@ -150,7 +151,7 @@ module.exports = class {
150
151
  /**
151
152
  * 关闭
152
153
  */
153
- async close(act = 3) {
154
+ async close(act = 1) {
154
155
 
155
156
  try {
156
157
  if (act & 1) await this.page.close();
@@ -326,11 +327,15 @@ module.exports = class {
326
327
  * div[name=abc]
327
328
  * iframe:first-child
328
329
  * div>ul>li:nth-child(4)>a
329
- *
330
+ *
331
+ * querySelector,若匹配不到返回null
332
+ * page.$(),若匹配不上则会抛出错误,这里最后也会在cath中返回null
333
+ *
330
334
  * @param {Object} tag
331
335
  */
332
- async element(tag) {
336
+ async element(tag, selector = false) {
333
337
  try {
338
+ if (selector) return await this.page.querySelector(tag);
334
339
  return await this.page.$(tag);
335
340
  }
336
341
  catch (e) {
@@ -495,90 +500,6 @@ module.exports = class {
495
500
  }
496
501
 
497
502
 
498
- jsonArray(cookiesVal, host) {
499
- if (!cookiesVal) return [];
500
- return JSON.parse(cookiesVal).map(cook => {
501
- let { name, value, domain } = cook;
502
- if (!domain) domain = host;
503
- return { name, value, domain };
504
- });
505
- }
506
-
507
- /**
508
-
509
- // const url = parseUrl(this.page.url());
510
- // const host = '.' + url.host.split('.').slice(-2).join('.');
511
- // let cookiesVal = read(file, 'utf8');
512
- // cookiesVal = this.jsonArray(cookiesVal, host);
513
-
514
- */
515
-
516
- /**
517
- * 合并两个Cookies,并以后面的值为准
518
- *
519
- * @param {Object} oldCookies
520
- * @param {Object} newCookies
521
- */
522
- async mergeNewCookies(oldCookies, newCookies) {
523
- if (newCookies.length === 0) return oldCookies;
524
- // let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
525
- let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
526
- newCookies.map(obj => {
527
- aMap.set(obj.name, obj)
528
- });
529
- return await Array.from(aMap.values());
530
- }
531
-
532
-
533
- /**
534
- * 获取当前页面的Cookies
535
- */
536
- async getCookies() {
537
- try {
538
- return await this.page.cookies();
539
- }
540
- catch (e) {
541
- console.log('[chrome.getCookies.Error]', e.message);
542
- return [];
543
- }
544
- }
545
-
546
- /**
547
- * 设置Cookies
548
- * @param {Object} cookies
549
- */
550
- async setCookies(cookies) {
551
- try {
552
- await this.page.setCookie(...cookies);
553
- return this;
554
- }
555
- catch (e) {
556
- console.log('[chrome.setCookies.Error]', e.message);
557
- console.log(cookies);
558
- }
559
- }
560
-
561
- /**
562
- * 保存当前页面中的Cookies
563
- * @param {Object} file
564
- */
565
- async saveCookies(file, append = true) {
566
- try {
567
- let cookies = await this.page.cookies();
568
-
569
- if (append && fs.existsSync(file)) {
570
- let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
571
- cookies = this.mergeNewCookies(dbCookies, cookies);
572
- }
573
-
574
- await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
575
- return this;
576
- }
577
- catch (e) {
578
- console.log('[chrome.saveCookies.Error]', e.message);
579
- }
580
- }
581
-
582
503
  /**
583
504
  * 加水印,这里实际上是在网页加个DIV并显示时间
584
505
  * @param {Object} conf
@@ -633,20 +554,20 @@ module.exports = class {
633
554
  }
634
555
 
635
556
  /**
636
- * 补全所有本地js/css
557
+ * 补全所有本地js/css,一般用于保存html之前
637
558
  */
638
559
  async improveUrls() {
639
560
  const url = parseUrl(this.page.url());
640
561
  const domain = url.protocol + '//' + url.host;
641
562
  await this.page.evaluate((domain) => {
642
563
  try {
643
- const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
644
- tags.forEach((ele) => {
645
- const src = ele.getAttribute('src');
646
- const href = ele.getAttribute('href');
647
- if (src && src.startsWith('/')) ele.src = domain + src;
648
- if (href && href.startsWith('/')) ele.href = domain + href;
649
- });
564
+ document.querySelectorAll('script[src], link[href], iframe[src]')
565
+ .forEach((ele) => {
566
+ const src = ele.getAttribute('src');
567
+ const href = ele.getAttribute('href');
568
+ if (src && src.startsWith('/')) ele.src = domain + src;
569
+ if (href && href.startsWith('/')) ele.href = domain + href;
570
+ });
650
571
  }
651
572
  catch (e) {
652
573
  console.log('[chrome.improveUrls.Error]', e.message);
@@ -655,39 +576,6 @@ module.exports = class {
655
576
  }
656
577
 
657
578
 
658
- /**
659
- * 解析网页set-cookies的值
660
- *
661
- * @param {Object} strCookies
662
- */
663
- async parseCookies(strCookies) {
664
- return await strCookies.split("\n").map((ls) => {
665
- let value = {};
666
- ls.split(';').map((ln, j) => {
667
- // console.log(ln);
668
- const arr = ln.split('=');
669
- const Key = (arr[0]).trim();
670
- if (!Key) return;
671
-
672
- if (j === 0) {
673
- value.name = Key;
674
- value.value = arr[1];
675
- }
676
- else if (Key === 'Max-Age') {
677
- value.expire = parseInt(arr[1]) + (Date.now() / 1000);
678
- }
679
- else if (Key === 'Secure') {
680
- value.source = true;
681
- value.sourceScheme = 'Secure';
682
- }
683
- else {
684
- value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
685
- }
686
- })
687
- return value;
688
- })
689
- }
690
-
691
579
 
692
580
  async parseResponse(response) {
693
581
  // const response = await this.page.waitForResponse(res => res);
@@ -710,7 +598,7 @@ module.exports = class {
710
598
  value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
711
599
  // value.headers = headers;
712
600
  if (headers['server']) value.server = headers['server'];
713
- if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
601
+ if (headers['set-cookie']) value.cookies = await this.cookies.parseCookies(headers['set-cookie']);
714
602
  value.remote = await response.remoteAddress(); //目标服务器
715
603
  if (value.status === 301 || value.status === 302) return value;
716
604
  if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
@@ -736,7 +624,7 @@ module.exports = class {
736
624
  }
737
625
 
738
626
 
739
- doListening(params) {
627
+ doListening(options) {
740
628
 
741
629
  this.page.on('request', async (request) => {
742
630
  if (this.requestCall) {
@@ -747,27 +635,27 @@ module.exports = class {
747
635
  }
748
636
  }
749
637
 
750
- const { abort_img = false, no_cache = true, append_headers = {} } = params;
751
- if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
638
+ const { abort_img = false, no_cache = true, headers = {} } = options;
639
+ if (abort_img && /\.(?:png|jpg|jpeg|svg|gif|bmp)$/i.test(request.url())) {
752
640
  request.abort();
753
641
  return;
754
642
  }
755
643
 
756
- const headers = request.headers();
757
- if (append_headers !== {}) Object.assign(headers, append_headers);
644
+ const headersAll = request.headers();
645
+ if (headers !== {}) Object.assign(headersAll, headers);
758
646
 
759
- headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
760
- headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
761
- // headers['Access-Control-Allow-Headers'] = 'Content-Type';
647
+ headersAll['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
648
+ headersAll['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
649
+ // headersAll['Access-Control-Allow-Headers'] = 'Content-Type';
762
650
 
763
651
  if (no_cache) {
764
652
  const rType = request.resourceType();
765
653
  if (rType === 'script' || rType === 'stylesheet') {
766
- headers['Cache-Control'] = 'no-store'; // 禁用缓存
654
+ headersAll['Cache-Control'] = 'no-store'; // 禁用缓存
767
655
  }
768
656
  }
769
657
 
770
- request.continue({ headers });
658
+ request.continue({ headers: headersAll });
771
659
  });
772
660
 
773
661
  this.page.on('response', async res => {
@@ -775,7 +663,8 @@ module.exports = class {
775
663
  let json = await this.parseResponse(res);
776
664
  await this.responseCall(json);
777
665
  });
778
- }
779
666
 
780
667
 
668
+ }
669
+
781
670
  }
package/src/cookies.js ADDED
@@ -0,0 +1,138 @@
1
+ const fs = require("fs");
2
+
3
+
4
+ module.exports = class {
5
+ page = void 0;
6
+ file = void 0;
7
+ cookies = void 0;
8
+
9
+ constructor(page, cookies) {
10
+ this.page = page;
11
+ if (cookies === false) return;
12
+
13
+ this.file = cookies;
14
+ if (!fs.existsSync(cookies)) return;
15
+ console.log('this.cookie_file', cookies);
16
+ let cookiesVal = fs.readFileSync(cookies, 'utf8');
17
+ this.cookies = JSON.parse(cookiesVal);
18
+ }
19
+
20
+ jsonArray(cookiesVal, host) {
21
+ if (!cookiesVal) return [];
22
+ return JSON.parse(cookiesVal).map(cook => {
23
+ let { name, value, domain } = cook;
24
+ if (!domain) domain = host;
25
+ return { name, value, domain };
26
+ });
27
+ }
28
+
29
+ /**
30
+
31
+ // const url = parseUrl(this.page.url());
32
+ // const host = '.' + url.host.split('.').slice(-2).join('.');
33
+ // let cookiesVal = read(file, 'utf8');
34
+ // cookiesVal = this.jsonArray(cookiesVal, host);
35
+
36
+ */
37
+ /**
38
+ * 解析网页set-cookies的值
39
+ *
40
+ * @param {Object} strCookies
41
+ */
42
+ async parseCookies(strCookies) {
43
+ return await strCookies.split("\n").map((ls) => {
44
+ let value = {};
45
+ ls.split(';').map((ln, j) => {
46
+ // console.log(ln);
47
+ const arr = ln.split('=');
48
+ const Key = (arr[0]).trim();
49
+ if (!Key) return;
50
+
51
+ if (j === 0) {
52
+ value.name = Key;
53
+ value.value = arr[1];
54
+ }
55
+ else if (Key === 'Max-Age') {
56
+ value.expire = parseInt(arr[1]) + (Date.now() / 1000);
57
+ }
58
+ else if (Key === 'Secure') {
59
+ value.source = true;
60
+ value.sourceScheme = 'Secure';
61
+ }
62
+ else {
63
+ value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
64
+ }
65
+ })
66
+ return value;
67
+ })
68
+ }
69
+
70
+ /**
71
+ * 合并两个Cookies,并以后面的值为准
72
+ *
73
+ * @param {Object} oldCookies
74
+ * @param {Object} newCookies
75
+ */
76
+ async mergeNewCookies(oldCookies, newCookies) {
77
+ if (newCookies.length === 0) return oldCookies;
78
+ let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));
79
+ // let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
80
+ newCookies.map(obj => {
81
+ aMap.set(obj.name, obj)
82
+ });
83
+ return await Array.from(aMap.values());
84
+ }
85
+
86
+
87
+ /**
88
+ * 获取当前页面的Cookies
89
+ */
90
+ async getCookies() {
91
+ try {
92
+ return await this.page.cookies();
93
+ }
94
+ catch (e) {
95
+ console.log('[chrome.getCookies.Error]', e.message);
96
+ return [];
97
+ }
98
+ }
99
+
100
+ /**
101
+ * 设置Cookies
102
+ * @param {Object} cookies
103
+ */
104
+ async setCookies(cookies) {
105
+ try {
106
+ await this.page.setCookie(...cookies.map(ck => {
107
+ if (typeof ck.expires === 'string') ck.expires = new Date(ck.expires).getTime();
108
+ return ck;
109
+ }).filter(ck => (!!ck.domain || !!ck.url)));
110
+ }
111
+ catch (e) {
112
+ console.log('[chrome.setCookies.Error]', e.message);
113
+ console.log(JSON.stringify(cookies));
114
+ }
115
+ }
116
+
117
+ /**
118
+ * 保存当前页面中的Cookies
119
+ * @param {Object} file
120
+ */
121
+ async saveCookies(file, append = true) {
122
+ try {
123
+ let cookies = await this.page.cookies();
124
+
125
+ if (append && fs.existsSync(file)) {
126
+ let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
127
+ cookies = this.mergeNewCookies(dbCookies, cookies);
128
+ }
129
+ await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
130
+ }
131
+ catch (e) {
132
+ console.log('[chrome.saveCookies.Error]', e.message);
133
+ }
134
+ }
135
+
136
+
137
+
138
+ }