nodejs_chromium 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6,7 +6,7 @@ global.__UA__ = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (K
6
6
  async function newChrome(params) {
7
7
  let {
8
8
  id = 'myChrome',
9
- view = false,
9
+ visible = false,
10
10
  width = 1024,
11
11
  height = 768,
12
12
  scale = 1,
@@ -16,19 +16,21 @@ async function newChrome(params) {
16
16
  incognito = true,
17
17
  path = void 0,
18
18
  slowMo = 1,
19
- ua = void 0
19
+ ua = void 0,
20
+ proxy = null,
21
+ cookies = null, //若cookies=false,则不处理cookies,不指定则由chrome处理,若=文件路径
22
+ abort_img = false,
23
+ no_cache = true,
24
+ headers = {},
20
25
  } = params;
21
26
 
22
27
  let option = {
23
28
  userDataDir: `runtime/template/${id}`,
24
29
  // timeout: 500,//最大允许超时间,默认为3000,最小215,一般不要设置
25
30
  slowMo, //每一步停留时间,不能太大,否则会太慢,特别是在类似写入很多Cookies时,每写入一个都要等一下。
26
- headless: view ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
31
+ headless: visible ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
27
32
  devtools: !!debug, //打开调试
28
33
  ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
29
- ignoreDefaultArgs: [
30
- "--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
31
- ],
32
34
  dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
33
35
  defaultViewport: {
34
36
  width,
@@ -36,26 +38,32 @@ async function newChrome(params) {
36
38
  deviceScaleFactor: scale, //缩放比例
37
39
  isMobile: !!mobile,
38
40
  },
41
+ defaultArgs: [
42
+ '--disable-extensions', //禁止启动扩展
43
+ '--no-sandbox', //禁用沙箱模式
44
+ '--disable-setuid-sandbox', //禁用设置用户身份沙盒
45
+ '--disable-web-security', //禁用同源策略
46
+ ],
47
+ ignoreDefaultArgs: [
48
+ "--enable-automation", //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
49
+ ],
39
50
  args: [
40
51
  '--enable-chrome-browser-cloud-management', //Cloud Browser Client Management (CBCM)
41
52
  '--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
42
53
  `--window-size=${width},${height}`,
43
54
  '--no-sandbox', //禁用沙箱模式
44
55
  '--disable-setuid-sandbox', //禁用 setuid 沙箱。这是另一种沙箱模式,通常用于 Linux 系统上。
45
- '--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
56
+ // '--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
46
57
  '--disable-gpu', //禁用 GPU 加速。这通常用于在服务器环境或某些不支持 GPU 加速的平台上运行 Chrome。
47
- '--disable-blink-features=AutomationControlled', //禁用 blink 引擎的自动化控制特性,防止网站通过检测 blink 引擎的特性来判断浏览器是否处于自动化控制状态。
48
- // 在自动化测试或爬虫等场景中,这个参数可以帮助隐藏浏览器的自动化痕迹,使得浏览器行为更接近于真实用户操作
58
+ '--disable-blink-features=AutomationControlled', //防止检测 blink 引擎特性判断是否处于自动化控制状态。
49
59
  '--lang=zh-CN', //设置中文环境
50
60
  '--disable-extensions', //禁止启动扩展
51
61
  '--disable-dev-shm-usage', //Linux系统中使用普通的文件系统缓存避免因为/dev/shm大小不足而导致的问题
52
62
  ]
53
63
  }
54
64
 
55
- if (incognito) { //使用无痕模式启动
56
- option.args.push('--incognito', '--disable-infobars')
57
- }
58
-
65
+ if (proxy) option.defaultArgs.push(`--proxy-server=${proxy}`);
66
+ if (incognito) option.args.push('--incognito', '--disable-infobars'); //使用无痕模式启动
59
67
  if (path) option.executablePath = path; //指定chrome安装路径
60
68
 
61
69
  const browser = await puppeteer.launch(option);
@@ -65,11 +73,13 @@ async function newChrome(params) {
65
73
  if (ua) await page.setUserAgent(ua);
66
74
  await page.evaluateOnNewDocument(() => {
67
75
  const newProto = navigator.__proto__;
68
- delete newProto.webdriver; //删除 navigator.webdriver字段
76
+ delete newProto.webdriver; //删除 navigator.webdriver字段,防止检测到自动化行为
69
77
  navigator.__proto__ = newProto;
70
- }); //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
78
+ });
71
79
 
72
- return new chrome(browser, page, params);
80
+ if (cookies !== false) cookies = `runtime/cookies/${id}`;
81
+ const pageOption = { cookies, visible, abort_img, no_cache, headers };
82
+ return new chrome(browser, page, pageOption);
73
83
  }
74
84
 
75
85
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodejs_chromium",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/chrome.js CHANGED
@@ -1,4 +1,5 @@
1
1
  const fs = require("fs");
2
+ const Cookies = require("./cookies");
2
3
  const { parse: parseUrl } = require("url");
3
4
 
4
5
 
@@ -12,21 +13,23 @@ const { parse: parseUrl } = require("url");
12
13
  * @type {exports}
13
14
  */
14
15
  module.exports = class {
15
- browser = null;
16
- page = null;
17
- responseCall = null;
18
- requestCall = null;
19
- isFrame = false;
20
- visible = false;
21
- params = {};
22
-
23
- constructor(browser, page, params, isFrame = false) {
16
+ browser = void 0;
17
+ page = void 0;
18
+ responseCall = void 0;
19
+ requestCall = void 0;
20
+ cookies = void 0;
21
+ options = void 0;
22
+ isFrame = false; //是不是在iFrame中
23
+ visible = false; //是否可见,也就是有没有启动窗口
24
+
25
+ constructor(browser, page, options, isFrame = false) {
24
26
  this.browser = browser;
25
27
  this.page = page;
26
- this.params = params;
27
- this.visible = !!params.view;
28
+ this.options = options;
29
+ this.cookies = new Cookies(page, options.cookies);
30
+ this.visible = !!options.visible;
28
31
  this.isFrame = !!isFrame;
29
- this.doListening(params);
32
+ this.doListening(options);
30
33
  }
31
34
 
32
35
  request(call) {
@@ -46,13 +49,12 @@ module.exports = class {
46
49
  try {
47
50
  const page = await this.browser.newPage();
48
51
  await page.setRequestInterception(true); //允许拦截
49
- // await page.setUserAgent(this.params.ua);
50
52
  await page.evaluateOnNewDocument(() => {
51
53
  const newProto = navigator.__proto__;
52
54
  delete newProto.webdriver; //删除 navigator.webdriver字段
53
55
  navigator.__proto__ = newProto; //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
54
56
  });
55
- return new module.exports(this.browser, page, this.params, false); //new 自身
57
+ return new module.exports(this.browser, page, this.options, false); //new 自身
56
58
  }
57
59
  catch (e) {
58
60
  console.log('[chrome.iframe.Error]', e.message);
@@ -132,11 +134,14 @@ module.exports = class {
132
134
  }
133
135
  }
134
136
 
137
+ /**
138
+ * 按tag创建当前窗口中的某个iFrame
139
+ * @param {Object} tag
140
+ */
135
141
  async iframe(tag) {
136
142
  try {
137
143
  const frame = await (await this.page.$(tag)).contentFrame();
138
- // return new iframe(this.browser, frame, this.params);
139
- return new module.exports(this.browser, frame, this.params, true); //new 自身
144
+ return new module.exports(this.browser, frame, this.options, true); //new 自身
140
145
  }
141
146
  catch (e) {
142
147
  console.log('[chrome.iframe.Error]', e.message);
@@ -146,7 +151,7 @@ module.exports = class {
146
151
  /**
147
152
  * 关闭
148
153
  */
149
- async close(act = 3) {
154
+ async close(act = 1) {
150
155
 
151
156
  try {
152
157
  if (act & 1) await this.page.close();
@@ -175,7 +180,9 @@ module.exports = class {
175
180
  return this.page.url();
176
181
  }
177
182
 
178
-
183
+ /**
184
+ * 重新设置尺寸
185
+ */
179
186
  async size(width = 1024, height = 768) {
180
187
  await this.page.setViewport({ width, height });
181
188
  return this;
@@ -279,9 +286,9 @@ module.exports = class {
279
286
  */
280
287
  async display(tag, show) {
281
288
  try {
282
- // const element = await this.page.querySelector(tag);
283
- // element.style.display = (!!show) ? '' : 'none';
284
- (await this.page.querySelector(tag)).style.display = (!!show) ? '' : 'none';
289
+ const element = await this.page.querySelector(tag);
290
+ if (!element) throw new Error(`${tag} not exists`);
291
+ element.style.display = (!!show) ? '' : 'none';
285
292
  return this;
286
293
  }
287
294
  catch (e) {
@@ -320,11 +327,15 @@ module.exports = class {
320
327
  * div[name=abc]
321
328
  * iframe:first-child
322
329
  * div>ul>li:nth-child(4)>a
323
- *
330
+ *
331
+ * querySelector,若匹配不到返回null
332
+ * page.$(),若匹配不上则会抛出错误,这里最后也会在cath中返回null
333
+ *
324
334
  * @param {Object} tag
325
335
  */
326
- async element(tag) {
336
+ async element(tag, selector = false) {
327
337
  try {
338
+ if (selector) return await this.page.querySelector(tag);
328
339
  return await this.page.$(tag);
329
340
  }
330
341
  catch (e) {
@@ -376,7 +387,6 @@ module.exports = class {
376
387
  async waiting(time) {
377
388
  return await this.sleep(time);
378
389
  }
379
-
380
390
  async sleep(time) {
381
391
  if (time < 100) time = time * 1000;
382
392
  try {
@@ -485,93 +495,10 @@ module.exports = class {
485
495
  return this;
486
496
  }
487
497
  catch (e) {
488
- console.log('[chrome.saveHtml.Error]', e.parse());
489
- }
490
- }
491
-
492
-
493
- jsonArray(cookiesVal, host) {
494
- if (!cookiesVal) return [];
495
- return JSON.parse(cookiesVal).map(cook => {
496
- let { name, value, domain } = cook;
497
- if (!domain) domain = host;
498
- return { name, value, domain };
499
- });
500
- }
501
-
502
- /**
503
-
504
- // const url = parseUrl(this.page.url());
505
- // const host = '.' + url.host.split('.').slice(-2).join('.');
506
- // let cookiesVal = read(file, 'utf8');
507
- // cookiesVal = this.jsonArray(cookiesVal, host);
508
-
509
- */
510
-
511
- /**
512
- * 合并两个Cookies,并以后面的值为准
513
- *
514
- * @param {Object} oldCookies
515
- * @param {Object} newCookies
516
- */
517
- async mergeNewCookies(oldCookies, newCookies) {
518
- if (newCookies.length === 0) return oldCookies;
519
- // let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));//浅拷贝
520
- let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
521
- newCookies.map(obj => {
522
- aMap.set(obj.name, obj)
523
- });
524
- return await Array.from(aMap.values());
525
- }
526
-
527
-
528
- /**
529
- * 获取当前页面的Cookies
530
- */
531
- async getCookies() {
532
- try {
533
- return await this.page.cookies();
534
- }
535
- catch (e) {
536
- console.log('[chrome.getCookies.Error]', e.message);
537
- return [];
538
- }
539
- }
540
-
541
- /**
542
- * 设置Cookies
543
- * @param {Object} cookies
544
- */
545
- async setCookies(cookies) {
546
- try {
547
- await this.page.setCookie(...cookies);
548
- return this;
549
- }
550
- catch (e) {
551
- console.log('[chrome.setCookies.Error]', e.message);
498
+ console.log('[chrome.saveHtml.Error]', e.message);
552
499
  }
553
500
  }
554
501
 
555
- /**
556
- * 保存当前页面中的Cookies
557
- * @param {Object} file
558
- */
559
- async saveCookies(file, append = true) {
560
- try {
561
- let cookies = await this.page.cookies();
562
-
563
- if (append && fs.existsSync(file)) {
564
- let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
565
- cookies = this.mergeNewCookies(dbCookies, cookies);
566
- }
567
-
568
- await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
569
- return this;
570
- }
571
- catch (e) {
572
- console.log('[chrome.saveCookies.Error]', e.parse());
573
- }
574
- }
575
502
 
576
503
  /**
577
504
  * 加水印,这里实际上是在网页加个DIV并显示时间
@@ -627,20 +554,20 @@ module.exports = class {
627
554
  }
628
555
 
629
556
  /**
630
- * 补全所有本地js/css
557
+ * 补全所有本地js/css,一般用于保存html之前
631
558
  */
632
559
  async improveUrls() {
633
560
  const url = parseUrl(this.page.url());
634
561
  const domain = url.protocol + '//' + url.host;
635
562
  await this.page.evaluate((domain) => {
636
563
  try {
637
- const tags = document.querySelectorAll('script[src], link[href], iframe[src]');
638
- tags.forEach((ele) => {
639
- const src = ele.getAttribute('src');
640
- const href = ele.getAttribute('href');
641
- if (src && src.startsWith('/')) ele.src = domain + src;
642
- if (href && href.startsWith('/')) ele.href = domain + href;
643
- });
564
+ document.querySelectorAll('script[src], link[href], iframe[src]')
565
+ .forEach((ele) => {
566
+ const src = ele.getAttribute('src');
567
+ const href = ele.getAttribute('href');
568
+ if (src && src.startsWith('/')) ele.src = domain + src;
569
+ if (href && href.startsWith('/')) ele.href = domain + href;
570
+ });
644
571
  }
645
572
  catch (e) {
646
573
  console.log('[chrome.improveUrls.Error]', e.message);
@@ -649,39 +576,6 @@ module.exports = class {
649
576
  }
650
577
 
651
578
 
652
- /**
653
- * 解析网页set-cookies的值
654
- *
655
- * @param {Object} strCookies
656
- */
657
- async parseCookies(strCookies) {
658
- return await strCookies.split("\n").map((ls, i) => {
659
- let value = {};
660
- ls.split(';').map((ln, j) => {
661
- // console.log(ln);
662
- const arr = ln.split('=');
663
- const Key = (arr[0]).trim();
664
- if (!Key) return;
665
-
666
- if (j === 0) {
667
- value.name = Key;
668
- value.value = arr[1];
669
- }
670
- else if (Key === 'Max-Age') {
671
- value.expire = parseInt(arr[1]) + (Date.now() / 1000);
672
- }
673
- else if (Key === 'Secure') {
674
- value.source = true;
675
- value.sourceScheme = 'Secure';
676
- }
677
- else {
678
- value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
679
- }
680
- })
681
- return value;
682
- })
683
- }
684
-
685
579
 
686
580
  async parseResponse(response) {
687
581
  // const response = await this.page.waitForResponse(res => res);
@@ -704,7 +598,7 @@ module.exports = class {
704
598
  value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
705
599
  // value.headers = headers;
706
600
  if (headers['server']) value.server = headers['server'];
707
- if (headers['set-cookie']) value.cookies = await this.parseCookies(headers['set-cookie']);
601
+ if (headers['set-cookie']) value.cookies = await this.cookies.parseCookies(headers['set-cookie']);
708
602
  value.remote = await response.remoteAddress(); //目标服务器
709
603
  if (value.status === 301 || value.status === 302) return value;
710
604
  if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
@@ -730,7 +624,7 @@ module.exports = class {
730
624
  }
731
625
 
732
626
 
733
- doListening(params) {
627
+ doListening(options) {
734
628
 
735
629
  this.page.on('request', async (request) => {
736
630
  if (this.requestCall) {
@@ -741,27 +635,27 @@ module.exports = class {
741
635
  }
742
636
  }
743
637
 
744
- const { abort_img = false, no_cache = true, append_headers = {} } = params;
745
- if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
638
+ const { abort_img = false, no_cache = true, headers = {} } = options;
639
+ if (abort_img && /\.(?:png|jpg|jpeg|svg|gif|bmp)$/i.test(request.url())) {
746
640
  request.abort();
747
641
  return;
748
642
  }
749
643
 
750
- const headers = request.headers();
751
- if (append_headers !== {}) Object.assign(headers, append_headers);
644
+ const headersAll = request.headers();
645
+ if (headers !== {}) Object.assign(headersAll, headers);
752
646
 
753
- headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
754
- headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
755
- // headers['Access-Control-Allow-Headers'] = 'Content-Type';
647
+ headersAll['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
648
+ headersAll['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
649
+ // headersAll['Access-Control-Allow-Headers'] = 'Content-Type';
756
650
 
757
651
  if (no_cache) {
758
652
  const rType = request.resourceType();
759
653
  if (rType === 'script' || rType === 'stylesheet') {
760
- headers['Cache-Control'] = 'no-store'; // 禁用缓存
654
+ headersAll['Cache-Control'] = 'no-store'; // 禁用缓存
761
655
  }
762
656
  }
763
657
 
764
- request.continue({ headers });
658
+ request.continue({ headers: headersAll });
765
659
  });
766
660
 
767
661
  this.page.on('response', async res => {
@@ -769,7 +663,8 @@ module.exports = class {
769
663
  let json = await this.parseResponse(res);
770
664
  await this.responseCall(json);
771
665
  });
772
- }
773
666
 
774
667
 
668
+ }
669
+
775
670
  }
package/src/cookies.js ADDED
@@ -0,0 +1,138 @@
1
+ const fs = require("fs");
2
+
3
+
4
+ module.exports = class {
5
+ page = void 0;
6
+ file = void 0;
7
+ cookies = void 0;
8
+
9
+ constructor(page, cookies) {
10
+ this.page = page;
11
+ if (cookies === false) return;
12
+
13
+ this.file = cookies;
14
+ if (!fs.existsSync(cookies)) return;
15
+ console.log('this.cookie_file', cookies);
16
+ let cookiesVal = fs.readFileSync(cookies, 'utf8');
17
+ this.cookies = JSON.parse(cookiesVal);
18
+ }
19
+
20
+ jsonArray(cookiesVal, host) {
21
+ if (!cookiesVal) return [];
22
+ return JSON.parse(cookiesVal).map(cook => {
23
+ let { name, value, domain } = cook;
24
+ if (!domain) domain = host;
25
+ return { name, value, domain };
26
+ });
27
+ }
28
+
29
+ /**
30
+
31
+ // const url = parseUrl(this.page.url());
32
+ // const host = '.' + url.host.split('.').slice(-2).join('.');
33
+ // let cookiesVal = read(file, 'utf8');
34
+ // cookiesVal = this.jsonArray(cookiesVal, host);
35
+
36
+ */
37
+ /**
38
+ * 解析网页set-cookies的值
39
+ *
40
+ * @param {Object} strCookies
41
+ */
42
+ async parseCookies(strCookies) {
43
+ return await strCookies.split("\n").map((ls) => {
44
+ let value = {};
45
+ ls.split(';').map((ln, j) => {
46
+ // console.log(ln);
47
+ const arr = ln.split('=');
48
+ const Key = (arr[0]).trim();
49
+ if (!Key) return;
50
+
51
+ if (j === 0) {
52
+ value.name = Key;
53
+ value.value = arr[1];
54
+ }
55
+ else if (Key === 'Max-Age') {
56
+ value.expire = parseInt(arr[1]) + (Date.now() / 1000);
57
+ }
58
+ else if (Key === 'Secure') {
59
+ value.source = true;
60
+ value.sourceScheme = 'Secure';
61
+ }
62
+ else {
63
+ value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
64
+ }
65
+ })
66
+ return value;
67
+ })
68
+ }
69
+
70
+ /**
71
+ * 合并两个Cookies,并以后面的值为准
72
+ *
73
+ * @param {Object} oldCookies
74
+ * @param {Object} newCookies
75
+ */
76
+ async mergeNewCookies(oldCookies, newCookies) {
77
+ if (newCookies.length === 0) return oldCookies;
78
+ let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));
79
+ // let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
80
+ newCookies.map(obj => {
81
+ aMap.set(obj.name, obj)
82
+ });
83
+ return await Array.from(aMap.values());
84
+ }
85
+
86
+
87
+ /**
88
+ * 获取当前页面的Cookies
89
+ */
90
+ async getCookies() {
91
+ try {
92
+ return await this.page.cookies();
93
+ }
94
+ catch (e) {
95
+ console.log('[chrome.getCookies.Error]', e.message);
96
+ return [];
97
+ }
98
+ }
99
+
100
+ /**
101
+ * 设置Cookies
102
+ * @param {Object} cookies
103
+ */
104
+ async setCookies(cookies) {
105
+ try {
106
+ await this.page.setCookie(...cookies.map(ck => {
107
+ if (typeof ck.expires === 'string') ck.expires = new Date(ck.expires).getTime();
108
+ return ck;
109
+ }).filter(ck => (!!ck.domain || !!ck.url)));
110
+ }
111
+ catch (e) {
112
+ console.log('[chrome.setCookies.Error]', e.message);
113
+ console.log(JSON.stringify(cookies));
114
+ }
115
+ }
116
+
117
+ /**
118
+ * 保存当前页面中的Cookies
119
+ * @param {Object} file
120
+ */
121
+ async saveCookies(file, append = true) {
122
+ try {
123
+ let cookies = await this.page.cookies();
124
+
125
+ if (append && fs.existsSync(file)) {
126
+ let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
127
+ cookies = this.mergeNewCookies(dbCookies, cookies);
128
+ }
129
+ await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
130
+ }
131
+ catch (e) {
132
+ console.log('[chrome.saveCookies.Error]', e.message);
133
+ }
134
+ }
135
+
136
+
137
+
138
+ }