nodejs_chromium 1.0.8 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -6,32 +6,32 @@ global.__UA__ = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (K
6
6
  async function newChrome(params) {
7
7
  let {
8
8
  id = 'myChrome',
9
- visible = false,
10
- width = 1024,
11
- height = 768,
12
- scale = 1,
13
- mobile = false,
14
- dumpio = false,
15
- debug = false,
16
- incognito = true,
17
- path = void 0,
18
- slowMo = 1,
19
- ua = void 0,
20
- proxy = null,
9
+ visible = false, //是否打开浏览器
10
+ width = 1024, //宽
11
+ height = 768, //高
12
+ scale = 1, //缩放比例
13
+ mobile = false, //手机版
14
+ dumpio = false, //转发标准输入/输出流
15
+ devtools = false, //打开devtools
16
+ cache = false, //禁用缓存
17
+ incognito = true, //使用无痕模式启动
18
+ path = void 0, //Chrome路径,linux下必填
19
+ slowMo = 1, //每一步停留时间
20
+ ua = void 0, //若不指定ua则用__UA__
21
+ proxy = null, //代理,如:https://proxy.com:789/
21
22
  cookies = null, //若cookies=false,则不处理cookies,不指定则由chrome处理,若=文件路径
22
- abort_img = false,
23
- no_cache = true,
23
+ abort = [], //要禁止的类型,常见["font","image","ping","stylesheet","document","fetch","script","xhr"]
24
24
  headers = {},
25
25
  } = params;
26
26
 
27
27
  let option = {
28
- userDataDir: `runtime/template/${id}`,
28
+ userDataDir: `runtime/.cache/${id}`,
29
29
  // timeout: 500,//最大允许超时间,默认为3000,最小215,一般不要设置
30
30
  slowMo, //每一步停留时间,不能太大,否则会太慢,特别是在类似写入很多Cookies时,每写入一个都要等一下。
31
31
  headless: visible ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
32
- devtools: !!debug, //打开调试
32
+ devtools: !!devtools, //打开调试
33
33
  ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
34
- dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
34
+ dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
35
35
  defaultViewport: {
36
36
  width,
37
37
  height,
@@ -52,8 +52,9 @@ async function newChrome(params) {
52
52
  '--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
53
53
  `--window-size=${width},${height}`,
54
54
  '--no-sandbox', //禁用沙箱模式
55
+ '--disable-autofill-backend', //禁止自动填充
55
56
  '--disable-setuid-sandbox', //禁用 setuid 沙箱。这是另一种沙箱模式,通常用于 Linux 系统上。
56
- // '--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
57
+ // '--disable-infobars', //禁用自动化控制时显示的信息栏,告诉用户浏览器正在被自动化工具控制。
57
58
  '--disable-gpu', //禁用 GPU 加速。这通常用于在服务器环境或某些不支持 GPU 加速的平台上运行 Chrome。
58
59
  '--disable-blink-features=AutomationControlled', //防止检测 blink 引擎特性判断是否处于自动化控制状态。
59
60
  '--lang=zh-CN', //设置中文环境
@@ -64,6 +65,7 @@ async function newChrome(params) {
64
65
 
65
66
  if (proxy) option.defaultArgs.push(`--proxy-server=${proxy}`);
66
67
  if (incognito) option.args.push('--incognito', '--disable-infobars'); //使用无痕模式启动
68
+ if (!cache) option.args.push('--disable-cache'); //禁缓存
67
69
  if (path) option.executablePath = path; //指定chrome安装路径
68
70
 
69
71
  const browser = await puppeteer.launch(option);
@@ -77,8 +79,8 @@ async function newChrome(params) {
77
79
  navigator.__proto__ = newProto;
78
80
  });
79
81
 
80
- if (cookies !== false) cookies = `runtime/cookies/${id}`;
81
- const pageOption = { cookies, visible, abort_img, no_cache, headers };
82
+ if (cookies !== false) cookies = `runtime/.cache/${id}/cookies.json`;
83
+ const pageOption = { cookies, visible, abort, headers };
82
84
  return new chrome(browser, page, pageOption);
83
85
  }
84
86
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nodejs_chromium",
3
- "version": "1.0.8",
3
+ "version": "1.1.1",
4
4
  "description": "",
5
5
  "main": "index.js",
6
6
  "scripts": {
package/src/chrome.js CHANGED
@@ -48,6 +48,7 @@ module.exports = class {
48
48
  async clone() {
49
49
  try {
50
50
  const page = await this.browser.newPage();
51
+ await page.setBypassCSP(true); //绕过页面的内容安全策略
51
52
  await page.setRequestInterception(true); //允许拦截
52
53
  await page.evaluateOnNewDocument(() => {
53
54
  const newProto = navigator.__proto__;
@@ -151,8 +152,7 @@ module.exports = class {
151
152
  /**
152
153
  * 关闭
153
154
  */
154
- async close(act = 1) {
155
-
155
+ async close(act = 0) {
156
156
  try {
157
157
  if (act & 1) await this.page.close();
158
158
  if (act & 2) await this.browser.close();
@@ -327,10 +327,10 @@ module.exports = class {
327
327
  * div[name=abc]
328
328
  * iframe:first-child
329
329
  * div>ul>li:nth-child(4)>a
330
- *
330
+ *
331
331
  * querySelector,若匹配不到返回null
332
332
  * page.$(),若匹配不上则会抛出错误,这里最后也会在cath中返回null
333
- *
333
+ *
334
334
  * @param {Object} tag
335
335
  */
336
336
  async element(tag, selector = false) {
@@ -387,6 +387,7 @@ module.exports = class {
387
387
  async waiting(time) {
388
388
  return await this.sleep(time);
389
389
  }
390
+
390
391
  async sleep(time) {
391
392
  if (time < 100) time = time * 1000;
392
393
  try {
@@ -576,7 +577,6 @@ module.exports = class {
576
577
  }
577
578
 
578
579
 
579
-
580
580
  async parseResponse(response) {
581
581
  // const response = await this.page.waitForResponse(res => res);
582
582
 
@@ -598,7 +598,7 @@ module.exports = class {
598
598
  value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
599
599
  // value.headers = headers;
600
600
  if (headers['server']) value.server = headers['server'];
601
- if (headers['set-cookie']) value.cookies = await this.cookies.parseCookies(headers['set-cookie']);
601
+ if (headers['set-cookie']) value.cookies = await this.cookies.parse(headers['set-cookie']);
602
602
  value.remote = await response.remoteAddress(); //目标服务器
603
603
  if (value.status === 301 || value.status === 302) return value;
604
604
  if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
@@ -612,50 +612,48 @@ module.exports = class {
612
612
  if (value.post) value.post = value.post.toString();
613
613
 
614
614
  try {
615
- value.buffer = await response.buffer();
616
- value.response = value.buffer.toString();
615
+ const buffer = await response.buffer();
616
+ value.response = buffer.toString();
617
617
  value.json = JSON.parse(value.response);
618
618
  }
619
619
  catch (e) {
620
- value.json = e.parse();
620
+ const err = e.parse();
621
+ value.json = err.message;
621
622
  }
622
623
 
623
624
  return value;
624
625
  }
625
626
 
627
+ host(url) {
628
+ // const domain = url.protocol + '//' + url.host;
629
+ const urls = parseUrl(url);
630
+ return '.' + urls.host.split('.').slice(-2).join('.');
631
+ }
632
+
626
633
 
627
- doListening(options) {
634
+ async doListening(options) {
628
635
 
629
636
  this.page.on('request', async (request) => {
637
+ const host = this.host(request.url());
638
+ const headers = request.headers();
639
+ if (headers.cookie) this.cookies.request(headers.cookie, host);
640
+
630
641
  if (this.requestCall) {
631
642
  const run = await this.requestCall(request);
632
- if (run === false) {
633
- request.abort();
634
- return;
635
- }
643
+ if (run === false) return request.abort();
636
644
  }
637
645
 
638
- const { abort_img = false, no_cache = true, headers = {} } = options;
639
- if (abort_img && /\.(?:png|jpg|jpeg|svg|gif|bmp)$/i.test(request.url())) {
640
- request.abort();
641
- return;
642
- }
646
+ const rType = request.resourceType();
647
+ const { abort = [], headers: optHead = {} } = options;
643
648
 
644
- const headersAll = request.headers();
645
- if (headers !== {}) Object.assign(headersAll, headers);
649
+ if (abort.length > 0 && abort.some(t => t === rType)) return request.abort();
646
650
 
647
- headersAll['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
648
- headersAll['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
649
- // headersAll['Access-Control-Allow-Headers'] = 'Content-Type';
651
+ if (optHead !== {}) Object.assign(headers, optHead);
650
652
 
651
- if (no_cache) {
652
- const rType = request.resourceType();
653
- if (rType === 'script' || rType === 'stylesheet') {
654
- headersAll['Cache-Control'] = 'no-store'; // 禁用缓存
655
- }
656
- }
657
-
658
- request.continue({ headers: headersAll });
653
+ headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
654
+ headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
655
+ // headers['Access-Control-Allow-Headers'] = 'Content-Type';
656
+ request.continue({ headers });
659
657
  });
660
658
 
661
659
  this.page.on('response', async res => {
@@ -664,7 +662,6 @@ module.exports = class {
664
662
  await this.responseCall(json);
665
663
  });
666
664
 
667
-
668
665
  }
669
666
 
670
667
  }
package/src/cookies.js CHANGED
@@ -1,10 +1,11 @@
1
1
  const fs = require("fs");
2
+ const { parse: parseUrl } = require("url");
2
3
 
3
4
 
4
5
  module.exports = class {
5
6
  page = void 0;
6
7
  file = void 0;
7
- cookies = void 0;
8
+ cookies = [];
8
9
 
9
10
  constructor(page, cookies) {
10
11
  this.page = page;
@@ -12,14 +13,16 @@ module.exports = class {
12
13
 
13
14
  this.file = cookies;
14
15
  if (!fs.existsSync(cookies)) return;
15
- console.log('this.cookie_file', cookies);
16
+ // console.log('cookies file:', cookies);
16
17
  let cookiesVal = fs.readFileSync(cookies, 'utf8');
18
+ if (cookiesVal === '{}') cookiesVal = '[]';
17
19
  this.cookies = JSON.parse(cookiesVal);
20
+ // this.set(this.cookies);
18
21
  }
19
22
 
20
- jsonArray(cookiesVal, host) {
21
- if (!cookiesVal) return [];
22
- return JSON.parse(cookiesVal).map(cook => {
23
+ json(str, host) {
24
+ if (!str) return [];
25
+ return JSON.parse(str).map(cook => {
23
26
  let { name, value, domain } = cook;
24
27
  if (!domain) domain = host;
25
28
  return { name, value, domain };
@@ -27,19 +30,19 @@ module.exports = class {
27
30
  }
28
31
 
29
32
  /**
30
-
33
+
31
34
  // const url = parseUrl(this.page.url());
32
35
  // const host = '.' + url.host.split('.').slice(-2).join('.');
33
36
  // let cookiesVal = read(file, 'utf8');
34
37
  // cookiesVal = this.jsonArray(cookiesVal, host);
35
-
38
+
36
39
  */
37
40
  /**
38
41
  * 解析网页set-cookies的值
39
42
  *
40
43
  * @param {Object} strCookies
41
44
  */
42
- async parseCookies(strCookies) {
45
+ async parse(strCookies) {
43
46
  return await strCookies.split("\n").map((ls) => {
44
47
  let value = {};
45
48
  ls.split(';').map((ln, j) => {
@@ -54,6 +57,8 @@ module.exports = class {
54
57
  }
55
58
  else if (Key === 'Max-Age') {
56
59
  value.expire = parseInt(arr[1]) + (Date.now() / 1000);
60
+ if (value.expire > 0) value.expire_date = (value.expire * 1000).date('Y-m-d H:i:s.SSS')
61
+ //有可能存在expires
57
62
  }
58
63
  else if (Key === 'Secure') {
59
64
  value.source = true;
@@ -62,37 +67,29 @@ module.exports = class {
62
67
  else {
63
68
  value[Key[0].toLowerCase() + Key.substring(1)] = arr[1] || true;
64
69
  }
70
+
71
+ if (value.expires && value.expires > 0) {
72
+ value.expires_date = (value.expires * 1000).date('YYYY-mm-dd HH:ii:ss.SSS')
73
+ }
74
+
65
75
  })
66
76
  return value;
67
77
  })
68
78
  }
69
79
 
70
- /**
71
- * 合并两个Cookies,并以后面的值为准
72
- *
73
- * @param {Object} oldCookies
74
- * @param {Object} newCookies
75
- */
76
- async mergeNewCookies(oldCookies, newCookies) {
77
- if (newCookies.length === 0) return oldCookies;
78
- let aMap = new Map(oldCookies.map(obj => [obj.name, obj]));
79
- // let aMap = JSON.parse(JSON.stringify(this.cookies)); //深拷贝
80
- newCookies.map(obj => {
81
- aMap.set(obj.name, obj)
82
- });
83
- return await Array.from(aMap.values());
84
- }
85
-
86
80
 
87
81
  /**
88
82
  * 获取当前页面的Cookies
89
83
  */
90
- async getCookies() {
84
+ async get(key) {
91
85
  try {
92
- return await this.page.cookies();
86
+ await this.merge();
87
+ // const cookies = await this.page.cookies();
88
+ if (key === undefined) return this.cookies;
89
+ return (this.cookies.filter(c => c.name === key) || [{}])[0];
93
90
  }
94
91
  catch (e) {
95
- console.log('[chrome.getCookies.Error]', e.message);
92
+ console.log('[chrome.cookies.get.Error]', e.message);
96
93
  return [];
97
94
  }
98
95
  }
@@ -101,38 +98,91 @@ module.exports = class {
101
98
  * 设置Cookies
102
99
  * @param {Object} cookies
103
100
  */
104
- async setCookies(cookies) {
101
+ async set(cookies) {
105
102
  try {
103
+ if (cookies === undefined) cookies = this.cookies;
106
104
  await this.page.setCookie(...cookies.map(ck => {
107
105
  if (typeof ck.expires === 'string') ck.expires = new Date(ck.expires).getTime();
108
106
  return ck;
109
107
  }).filter(ck => (!!ck.domain || !!ck.url)));
110
108
  }
111
109
  catch (e) {
112
- console.log('[chrome.setCookies.Error]', e.message);
110
+ console.log('[chrome.cookies.set.Error]', e.message);
113
111
  console.log(JSON.stringify(cookies));
114
112
  }
115
113
  }
116
114
 
117
115
  /**
118
116
  * 保存当前页面中的Cookies
119
- * @param {Object} file
120
117
  */
121
- async saveCookies(file, append = true) {
118
+ async save() {
119
+ try {
120
+ if (!this.file) return [];
121
+ await this.merge();
122
+ await fs.writeFileSync(this.file, JSON.stringify(this.cookies, null, 2));
123
+ return this.cookies;
124
+ }
125
+ catch (e) {
126
+ console.log('[chrome.cookies.save.Error]', e.message);
127
+ return [];
128
+ }
129
+ }
130
+
131
+ async merge() {
122
132
  try {
123
133
  let cookies = await this.page.cookies();
134
+ // console.log('merge>>>>', cookies, '<<<<<merge');
135
+ if (cookies.length === 0) return;
136
+
137
+ const newMap = new Map(this.cookies.map(obj => [obj.name, obj]));
138
+ cookies.map(obj => {
139
+ if (obj.expires && obj.expires > 0) obj.expires_date = (obj.expires * 1000).date('yyyy-mm-dd HH:ii:ss.SSS')
140
+ newMap.set(obj.name, obj)
141
+ });
142
+ this.cookies = await Array.from(newMap.values());
124
143
 
125
- if (append && fs.existsSync(file)) {
126
- let dbCookies = JSON.parse(fs.readFileSync(file, 'utf8'));
127
- cookies = this.mergeNewCookies(dbCookies, cookies);
128
- }
129
- await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
130
144
  }
131
145
  catch (e) {
132
- console.log('[chrome.saveCookies.Error]', e.message);
146
+ console.log('[chrome.cookies.merge.Error]', e.message);
133
147
  }
134
148
  }
135
149
 
150
+ /**
151
+ * 合并request时自动带上的cookies
152
+ *
153
+ * @param cookie
154
+ * @param domain
155
+ * @returns {*[]|*}
156
+ */
157
+ async request(cookie, domain) {
158
+ if (!cookie) return [];
159
+ const cookies = cookie.split(';').map(ck => {
160
+ const [name, value] = ck.trim().split('=');
161
+ return { name, value };
162
+ });
163
+ const newMap = new Map(this.cookies.map(obj => [obj.name, obj]));
164
+ cookies.map(obj => {
165
+ const { name, value } = obj;
166
+ if (!newMap.has(obj.name)) {
167
+ newMap.set(obj.name, { name, value, domain })
168
+ return;
169
+ }
170
+ const cookies = newMap.get(obj.name);
171
+ if (!cookies.domain) {
172
+ newMap.set(obj.name, { name, value, domain })
173
+ return;
174
+ }
175
+ if (cookies.value === value) return;
176
+
177
+ const host = '.' + cookies.domain.split('.').slice(-2).join('.');
178
+ if (host === domain) {
179
+ cookies.value = value;
180
+ newMap.set(obj.name, cookies)
181
+ }
182
+ });
183
+
184
+ this.cookies = await Array.from(newMap.values());
185
+ }
136
186
 
137
187
 
138
188
  }