nodejs_chromium 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/demo.js +58 -0
- package/index.js +61 -0
- package/package.json +15 -0
- package/src/chrome.js +227 -0
package/demo.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
require('nodejs_chromium');
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @param {Object} account
|
|
5
|
+
*/
|
|
6
|
+
function debug_view(account) {
|
|
7
|
+
return await new Promise(async (trueCall, exitCall) => {
|
|
8
|
+
const { unid } = this.ctx.client;
|
|
9
|
+
|
|
10
|
+
const params = {
|
|
11
|
+
uid: this.ctx.client.unid,
|
|
12
|
+
incognito: true,
|
|
13
|
+
debug: false,
|
|
14
|
+
view: true,
|
|
15
|
+
width: 1680,
|
|
16
|
+
height: 960,
|
|
17
|
+
id: account.id,
|
|
18
|
+
ua: account.ua,
|
|
19
|
+
};
|
|
20
|
+
const chrome = await newChrome(params);
|
|
21
|
+
console.log(Date.now() / 1000, '初始化');
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
await chrome.open('https://domain.com/');
|
|
25
|
+
await chrome.wait('div#login', { timeout: 5 });
|
|
26
|
+
await chrome.saveHtml(`runtime/pages/${unid}1.html`);
|
|
27
|
+
await chrome.input('input[name=password]', account.password);
|
|
28
|
+
await chrome.input('input[name=user]', account.user);
|
|
29
|
+
|
|
30
|
+
setTimeout(async () => {
|
|
31
|
+
|
|
32
|
+
await chrome.click('div.submit');
|
|
33
|
+
await chrome.wait();
|
|
34
|
+
await chrome.saveCookies(account.cookie_file);
|
|
35
|
+
await chrome.saveHtml(`runtime/pages/${unid}2.html`);
|
|
36
|
+
await chrome.wait('div#body');
|
|
37
|
+
await chrome.watermark({ text: account.time });
|
|
38
|
+
await chrome.photograph(`runtime/screen/${unid}.jpeg`, 50);
|
|
39
|
+
await chrome.disconnect();
|
|
40
|
+
|
|
41
|
+
await patch.saveJson('runtime/response.json', chrome.response)
|
|
42
|
+
|
|
43
|
+
trueCall({})
|
|
44
|
+
|
|
45
|
+
}, 2000);
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
await chrome.disconnect();
|
|
51
|
+
console.log('Error:', error.message, "\n\n");
|
|
52
|
+
// throw error;
|
|
53
|
+
exitCall(error);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
}
|
package/index.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
require('nodejs_patch');
|
|
2
|
+
const puppeteer = require("puppeteer");
|
|
3
|
+
const chrome = require("./src/chrome.js");
|
|
4
|
+
|
|
5
|
+
async function newChrome(params) {
|
|
6
|
+
const ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36';
|
|
7
|
+
let { id, view, width, height, scale, mobile, dumpio, debug, incognito, path } = params;
|
|
8
|
+
if (!width) width = 1024;
|
|
9
|
+
if (!height) height = 768;
|
|
10
|
+
if (!scale) scale = 1;
|
|
11
|
+
if (!id) id = (Date.now() / 1000) + '';
|
|
12
|
+
|
|
13
|
+
let option = {
|
|
14
|
+
userDataDir: `runtime/template/${id}`,
|
|
15
|
+
// timeout: 500,//最大允许超时间,默认为3000,最小215,一般不要设置
|
|
16
|
+
slowMo: 2, //将 Puppeteer 操作减少指定的毫秒数,每一步停留时间,不能太大,否则会太慢。这样你就可以看清发生了什么,这很有用。
|
|
17
|
+
headless: view ? false : 'new', //'new',设置是否在无头模式下运行浏览器,false=会启动浏览器,true=无界面
|
|
18
|
+
devtools: !!debug, //打开调试
|
|
19
|
+
ignoreHTTPSErrors: true, //忽略 HTTPS 错误。屏蔽跳转不同域名的报错
|
|
20
|
+
ignoreDefaultArgs: ["--enable-automation"], //忽略默认的 --enable-automation 参数,这有助于防止某些网站检测到自动化行为。
|
|
21
|
+
dumpio: !!dumpio, //是否将浏览器的标准输入/输出流(stdio)的内容输出到 Node.js 的 stdout 和 stderr。
|
|
22
|
+
defaultViewport: {
|
|
23
|
+
width,
|
|
24
|
+
height,
|
|
25
|
+
deviceScaleFactor: scale, //缩放比例
|
|
26
|
+
isMobile: !!mobile,
|
|
27
|
+
},
|
|
28
|
+
args: [
|
|
29
|
+
'--disable-web-security', //禁用浏览器的同源策略(Same-Origin Policy)和跨站请求伪造(CSRF)保护
|
|
30
|
+
`--window-size=${width},${height}`,
|
|
31
|
+
'--no-sandbox', //禁用沙箱模式
|
|
32
|
+
'--disable-setuid-sandbox', //禁用 setuid 沙箱。这是另一种沙箱模式,通常用于 Linux 系统上。
|
|
33
|
+
'--disable-infobars', //禁用 Chrome 在自动化控制时显示的信息栏。这个信息栏通常会告诉用户浏览器正在被自动化工具控制。
|
|
34
|
+
'--disable-gpu', //禁用 GPU 加速。这通常用于在服务器环境或某些不支持 GPU 加速的平台上运行 Chrome。
|
|
35
|
+
'--disable-blink-features=AutomationControlled', //禁用 blink 引擎的自动化控制特性,防止网站通过检测 blink 引擎的特性来判断浏览器是否处于自动化控制状态。
|
|
36
|
+
// 在自动化测试或爬虫等场景中,这个参数可以帮助隐藏浏览器的自动化痕迹,使得浏览器行为更接近于真实用户操作
|
|
37
|
+
'--lang=zh-CN', //设置中文环境
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (incognito) { //使用无痕模式启动
|
|
42
|
+
option.args.push('--incognito', '--disable-infobars')
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (path) option.executablePath = path; //指定chrome安装路径
|
|
46
|
+
|
|
47
|
+
const browser = await puppeteer.launch(option);
|
|
48
|
+
const page = (await browser.pages())[0];
|
|
49
|
+
await page.setRequestInterception(true); //允许拦截
|
|
50
|
+
await page.setUserAgent(params.ua || ua);
|
|
51
|
+
await page.evaluateOnNewDocument(() => {
|
|
52
|
+
const newProto = navigator.__proto__;
|
|
53
|
+
delete newProto.webdriver; //删除 navigator.webdriver字段
|
|
54
|
+
navigator.__proto__ = newProto;
|
|
55
|
+
}); //在每次新文档加载时,删除 navigator.webdriver 字段,这有助于防止某些网站检测到自动化行为。
|
|
56
|
+
|
|
57
|
+
return new chrome(browser, page, params);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
module.exports = { newChrome }
|
package/package.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "nodejs_chromium",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
8
|
+
},
|
|
9
|
+
"dependencies": {
|
|
10
|
+
"nodejs_patch": ">1.0.5",
|
|
11
|
+
"puppeteer": "^21.10.0"
|
|
12
|
+
},
|
|
13
|
+
"author": "fazo@qq.com",
|
|
14
|
+
"license": "ISC"
|
|
15
|
+
}
|
package/src/chrome.js
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
const fs = require("fs");
|
|
2
|
+
const { parse: parseUrl } = require("url");
|
|
3
|
+
|
|
4
|
+
module.exports = class {
|
|
5
|
+
browser = null;
|
|
6
|
+
page = null;
|
|
7
|
+
response = [];
|
|
8
|
+
|
|
9
|
+
constructor(browser, page, params) {
|
|
10
|
+
this.browser = browser;
|
|
11
|
+
this.page = page;
|
|
12
|
+
|
|
13
|
+
page.on('request', (request) => {
|
|
14
|
+
const { abort_img, no_cache, append_headers } = params;
|
|
15
|
+
if (abort_img && /\.(?:png|jpg|jpeg|svg|gif)$/i.test(request.url())) {
|
|
16
|
+
request.abort();
|
|
17
|
+
return;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const headers = request.headers();
|
|
21
|
+
if (typeof append_headers === 'object') Object.assign(headers, append_headers);
|
|
22
|
+
|
|
23
|
+
headers['Access-Control-Allow-Origin'] = '*'; // 设置允许跨源访问的域名,可以根据需求修改
|
|
24
|
+
headers['Access-Control-Allow-Methods'] = '*'; //'GET, POST, PUT, OPTIONS';
|
|
25
|
+
// headers['Access-Control-Allow-Headers'] = 'Content-Type';
|
|
26
|
+
|
|
27
|
+
if (no_cache) {
|
|
28
|
+
const rType = request.resourceType();
|
|
29
|
+
if (rType === 'script' || rType === 'stylesheet') {
|
|
30
|
+
headers['Cache-Control'] = 'no-store'; // 禁用缓存
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
request.continue({ headers });
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
page.on('response', async res => {
|
|
38
|
+
let json = await this.parseResponse(res);
|
|
39
|
+
if (json.method === 'POST') console.log(json);
|
|
40
|
+
if (json.body) this.response.push(json);
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async open(url) {
|
|
45
|
+
return await this.page.goto(url, { waitUntil: 'load' });
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 根据目标要求格式提交
|
|
50
|
+
*
|
|
51
|
+
* @param url
|
|
52
|
+
* @param data
|
|
53
|
+
* @returns {Promise<*>}
|
|
54
|
+
*/
|
|
55
|
+
async post(url, data) {
|
|
56
|
+
//JSON.stringify(data)
|
|
57
|
+
return await this.page.goto(url, { method: 'POST', body: data, waitUntil: 'load' });
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async input(el, value) {
|
|
61
|
+
return await this.page.type(el, value);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async click(el) {
|
|
65
|
+
await this.page.click(el);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* page.on方法
|
|
70
|
+
*
|
|
71
|
+
* @param {Object} key
|
|
72
|
+
* @param {Object} call
|
|
73
|
+
*/
|
|
74
|
+
async on(key, call) {
|
|
75
|
+
await this.page.on(key, call);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async disconnect() {
|
|
79
|
+
await this.browser.disconnect();
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async wait(ele, timeout = 0, tryCount = 0) {
|
|
83
|
+
if (ele) {
|
|
84
|
+
return await this.waitSelector(ele, timeout, tryCount);
|
|
85
|
+
}
|
|
86
|
+
return await this.page.waitForNavigation();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async waitSelector(ele, timeout = 0, tryCount = 0) {
|
|
90
|
+
if (timeout < 200) timeout = timeout * 1000;
|
|
91
|
+
try {
|
|
92
|
+
await this.page.waitForSelector(ele, { timeout });
|
|
93
|
+
return true;
|
|
94
|
+
}
|
|
95
|
+
catch (e) {
|
|
96
|
+
if (tryCount > 0) {
|
|
97
|
+
return await this.waitSelector(ele, timeout, --tryCount)
|
|
98
|
+
}
|
|
99
|
+
return false;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
async parseResponse(response) {
|
|
104
|
+
// const response = await this.page.waitForResponse(res => res);
|
|
105
|
+
|
|
106
|
+
const value = {};
|
|
107
|
+
const request = await response.request();
|
|
108
|
+
const headers = await response.headers();
|
|
109
|
+
value.method = await request.method();
|
|
110
|
+
if (value.method === 'OPTIONS') return;
|
|
111
|
+
|
|
112
|
+
value.type = await request.resourceType();
|
|
113
|
+
// value.redirect = await response.redirectURL();
|
|
114
|
+
if (value.type === 'xhr') value.type = 'AJAX';
|
|
115
|
+
value.url = await response.url();
|
|
116
|
+
value.domain = parseUrl(value.url)['host'];
|
|
117
|
+
value.content = headers['content-type'];
|
|
118
|
+
value.length = headers['content-length'];
|
|
119
|
+
value.status = await response.status();
|
|
120
|
+
value.ok = await response.ok();
|
|
121
|
+
value.datetime = (new Date(headers['date'])).date('yyyy-mm-dd hh:ii:ss');
|
|
122
|
+
// value.headers = headers;
|
|
123
|
+
if (headers['server']) value.server = headers['server'];
|
|
124
|
+
if (headers['set-cookie']) value.cookies = headers['set-cookie'];
|
|
125
|
+
value.remote = await response.remoteAddress(); //目标服务器
|
|
126
|
+
if (value.status === 301 || value.status === 302) return value;
|
|
127
|
+
if (['image', 'font', 'other', 'script', 'stylesheet', 'document', 'ping', 'fetch'].has(value.type)) return value;
|
|
128
|
+
if (value.content) {
|
|
129
|
+
if (value.content.startsWith('application/vnd')) return value;
|
|
130
|
+
if (value.content.startsWith('application/xml')) return value;
|
|
131
|
+
if (value.content.startsWith('text/css')) return value;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
value.post = await request.postData();
|
|
135
|
+
if (value.post) value.post = value.post.toString();
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
value.response = await response.buffer();
|
|
139
|
+
value.response = value.response.toString();
|
|
140
|
+
}
|
|
141
|
+
catch (e) {
|
|
142
|
+
value.response = e.parse();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return value;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
async saveHtml(file) {
|
|
150
|
+
try {
|
|
151
|
+
await this.improveUrls(); //修正js/css的域名
|
|
152
|
+
const body = await this.page.evaluate(() => {
|
|
153
|
+
return document.documentElement.innerHTML;
|
|
154
|
+
});
|
|
155
|
+
await fs.writeFileSync(file, body);
|
|
156
|
+
}
|
|
157
|
+
catch (e) {
|
|
158
|
+
console.log('saveHtml Error:', e.parse());
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async saveCookies(file) {
|
|
163
|
+
try {
|
|
164
|
+
const cookies = await this.page.cookies();
|
|
165
|
+
await fs.writeFileSync(file, JSON.stringify(cookies, null, 2));
|
|
166
|
+
}
|
|
167
|
+
catch (e) {
|
|
168
|
+
console.log('saveHtml Error:', e.parse());
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
async watermark(conf) {
|
|
173
|
+
await this.page.evaluate((conf) => {
|
|
174
|
+
const wmDiv = document.createElement('div');
|
|
175
|
+
wmDiv.style.position = 'fixed';
|
|
176
|
+
wmDiv.style.top = `50%`;
|
|
177
|
+
wmDiv.style.left = `50%`;
|
|
178
|
+
wmDiv.style.transform = 'translate(-50%, -50%)';
|
|
179
|
+
wmDiv.style.fontSize = `36px`;
|
|
180
|
+
wmDiv.style.zIndex = '10000';
|
|
181
|
+
// wmDiv.style.color = color;
|
|
182
|
+
wmDiv.style.color = 'rgba(0, 0, 0, 0.9)';
|
|
183
|
+
wmDiv.style.pointerEvents = 'none';
|
|
184
|
+
wmDiv.innerText = conf.text;
|
|
185
|
+
document.body.appendChild(wmDiv);
|
|
186
|
+
}, conf);
|
|
187
|
+
await this.page.waitForSelector('div');
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* 网页拍照
|
|
193
|
+
* @param file
|
|
194
|
+
* @param quality
|
|
195
|
+
* @returns {Promise<void>}
|
|
196
|
+
*/
|
|
197
|
+
async photograph(file, quality = 50) {
|
|
198
|
+
await this.page.screenshot({
|
|
199
|
+
path: file,
|
|
200
|
+
fullPage: true, //全屏
|
|
201
|
+
type: 'jpeg',
|
|
202
|
+
quality: quality,
|
|
203
|
+
omitBackground: true, //显示背景
|
|
204
|
+
});
|
|
205
|
+
console.log('photograph=', file);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* 补全所有本地js/css
|
|
210
|
+
* @returns {Promise<void>}
|
|
211
|
+
* @constructor
|
|
212
|
+
*/
|
|
213
|
+
async improveUrls() {
|
|
214
|
+
const url = parseUrl(this.page.url());
|
|
215
|
+
const domain = url.protocol + '//' + url.host;
|
|
216
|
+
await this.page.evaluate((domain) => {
|
|
217
|
+
const elements = document.querySelectorAll('script[src], link[href], iframe[src]');
|
|
218
|
+
elements.forEach((element) => {
|
|
219
|
+
const src = element.getAttribute('src');
|
|
220
|
+
const href = element.getAttribute('href');
|
|
221
|
+
if (src && src.startsWith('/')) element.src = domain + src;
|
|
222
|
+
if (href && href.startsWith('/')) element.href = domain + href;
|
|
223
|
+
});
|
|
224
|
+
}, domain);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
}
|