unprint 0.19.4 → 0.19.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -6
- package/package.json +6 -2
- package/src/app.js +41 -80
- package/src/server.js +164 -95
- package/tests/remote.js +34 -6
package/README.md
CHANGED
|
@@ -294,22 +294,27 @@ unprint.get({
|
|
|
294
294
|
});
|
|
295
295
|
```
|
|
296
296
|
|
|
297
|
-
###
|
|
298
|
-
You can run unprint as a
|
|
299
|
-
If you don't need unprint-specific features, you should probably set up a dedicated proxy server instead.
|
|
297
|
+
### Browser server
|
|
298
|
+
You can run unprint as a Playwright browser server. This is particularly useful if the client does not have sufficient resources to run browsers locally. The browser server is not meant to replace a dedicated proxy.
|
|
300
299
|
|
|
301
300
|
#### Server
|
|
302
301
|
* Ensure optional dependencies are installed
|
|
303
|
-
* `
|
|
302
|
+
* `node src/app.js --server [port|address:port]`
|
|
303
|
+
|
|
304
|
+
Environment variables (optional, .env is supported):
|
|
305
|
+
* `UNPRINT_HOST`: Server host, default `127.0.0.1:3333`
|
|
306
|
+
* `UNPRINT_KEY`: Key required for authentication
|
|
307
|
+
* `UNPRINT_MEMORY_LIMIT`: Memory threshold at which to cycle the browser in MB
|
|
308
|
+
* `UNPRINT_LOG_LEVEL`: Verbosity of the console output, `info` (default), `debug`, `silly`, `warn`, `error`
|
|
304
309
|
|
|
305
310
|
#### Client
|
|
306
311
|
```
|
|
307
312
|
unprint.options({
|
|
308
313
|
remote: {
|
|
309
314
|
enabled: true,
|
|
310
|
-
|
|
315
|
+
use: true, // use by default
|
|
316
|
+
address: 'ws://10.0.0.1:3333'
|
|
311
317
|
key: 'YOUR_UNPRINT_KEY',
|
|
312
|
-
methods: ['browser'], // browser, get, post
|
|
313
318
|
},
|
|
314
319
|
});
|
|
315
320
|
```
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "unprint",
|
|
3
|
-
"version": "0.19.
|
|
3
|
+
"version": "0.19.5",
|
|
4
4
|
"description": "Simplify common web scraping tasks while staying in control of the data.",
|
|
5
5
|
"main": "src/app.js",
|
|
6
6
|
"repository": {
|
|
@@ -26,15 +26,19 @@
|
|
|
26
26
|
"eslint": "^8.17.0",
|
|
27
27
|
"eslint-config-airbnb": "^19.0.4",
|
|
28
28
|
"eslint-config-airbnb-base": "^15.0.0",
|
|
29
|
+
"express-ws": "^5.0.2",
|
|
29
30
|
"jsdom": "^17.0.0",
|
|
30
31
|
"minimist": "^1.2.8",
|
|
31
32
|
"moment-timezone": "^0.5.34",
|
|
32
33
|
"object-hash": "^3.0.0",
|
|
33
34
|
"patchright": "^1.56.1",
|
|
35
|
+
"pidtree": "^0.6.0",
|
|
36
|
+
"pidusage": "^4.0.1",
|
|
34
37
|
"set-cookie-parser": "^3.0.1",
|
|
35
38
|
"srcset": "^4.0.0",
|
|
36
39
|
"tunnel": "^0.0.6",
|
|
37
|
-
"undici": "^7.18.2"
|
|
40
|
+
"undici": "^7.18.2",
|
|
41
|
+
"ws": "^8.19.0"
|
|
38
42
|
},
|
|
39
43
|
"devDependencies": {
|
|
40
44
|
"@playwright/test": "^1.56.1",
|
package/src/app.js
CHANGED
|
@@ -23,8 +23,8 @@ const settings = {
|
|
|
23
23
|
userAgent: 'unprint',
|
|
24
24
|
remote: {
|
|
25
25
|
enable: false,
|
|
26
|
-
address: '
|
|
27
|
-
|
|
26
|
+
address: 'ws://127.0.0.1:3333/browser',
|
|
27
|
+
use: false,
|
|
28
28
|
},
|
|
29
29
|
limits: {
|
|
30
30
|
default: {
|
|
@@ -1219,9 +1219,21 @@ function curateResponse(res, data, options, { url, control, customOptions }) {
|
|
|
1219
1219
|
/* eslint-disable no-param-reassign */
|
|
1220
1220
|
const clients = new Map();
|
|
1221
1221
|
|
|
1222
|
+
async function getBrowserContext(browser, options, useProxy) {
|
|
1223
|
+
return browser.newContext({
|
|
1224
|
+
userAgent: options.browserUserAgent || options.userAgent,
|
|
1225
|
+
...options.context,
|
|
1226
|
+
...(useProxy && {
|
|
1227
|
+
proxy: {
|
|
1228
|
+
server: `${options.proxy.host}:${options.proxy.port}`,
|
|
1229
|
+
},
|
|
1230
|
+
}),
|
|
1231
|
+
});
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1222
1234
|
/* eslint-enable no-param-reassign */
|
|
1223
|
-
async function getBrowserInstance(scope, options, useProxy = false) {
|
|
1224
|
-
const scopeKey = `${scope}_${useProxy ? 'proxy' : 'direct'}_${options.browser ? hashObject(options.browser) : 'default'}_${options.context ? hashObject(options.context) : 'default'}`;
|
|
1235
|
+
async function getBrowserInstance(scope, options, useProxy = false, useRemote = false) {
|
|
1236
|
+
const scopeKey = `${scope}_${useRemote ? 'remote' : 'local'}_${useProxy ? 'proxy' : 'direct'}_${options.browser ? hashObject(options.browser) : 'default'}_${options.context ? hashObject(options.context) : 'default'}`;
|
|
1225
1237
|
const now = new Date();
|
|
1226
1238
|
|
|
1227
1239
|
if (clients.has(scopeKey)) {
|
|
@@ -1241,21 +1253,18 @@ async function getBrowserInstance(scope, options, useProxy = false) {
|
|
|
1241
1253
|
}
|
|
1242
1254
|
|
|
1243
1255
|
// if we await here, we create a race condition, and a second call to getBrowserInstance would launch another browser that will overwrite the first one
|
|
1244
|
-
const browserLauncher =
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
const contextLauncher = browserLauncher.then((browser) => browser.newContext({
|
|
1250
|
-
userAgent: options.browserUserAgent || options.userAgent,
|
|
1251
|
-
...options.context,
|
|
1252
|
-
...(useProxy && {
|
|
1253
|
-
proxy: {
|
|
1254
|
-
server: `${options.proxy.host}:${options.proxy.port}`,
|
|
1256
|
+
const browserLauncher = useRemote
|
|
1257
|
+
? chromium.connect(options.remote.address, {
|
|
1258
|
+
headers: {
|
|
1259
|
+
'unprint-key': options.remote.key,
|
|
1255
1260
|
},
|
|
1256
|
-
})
|
|
1257
|
-
|
|
1261
|
+
})
|
|
1262
|
+
: chromium.launch({
|
|
1263
|
+
headless: true,
|
|
1264
|
+
...options.browser,
|
|
1265
|
+
});
|
|
1258
1266
|
|
|
1267
|
+
const contextLauncher = browserLauncher.then((browser) => getBrowserContext(browser, options, useProxy));
|
|
1259
1268
|
const launchers = Promise.all([browserLauncher, contextLauncher]);
|
|
1260
1269
|
|
|
1261
1270
|
const client = {
|
|
@@ -1264,6 +1273,7 @@ async function getBrowserInstance(scope, options, useProxy = false) {
|
|
|
1264
1273
|
active: 0,
|
|
1265
1274
|
uses: 1,
|
|
1266
1275
|
retired: false,
|
|
1276
|
+
isRemote: useRemote,
|
|
1267
1277
|
createdAt: now,
|
|
1268
1278
|
lastUsedAt: now,
|
|
1269
1279
|
};
|
|
@@ -1302,6 +1312,7 @@ async function closeAllBrowsers() {
|
|
|
1302
1312
|
|
|
1303
1313
|
async function closeBrowser(client, options = {}) {
|
|
1304
1314
|
if (options.client === null // this browser is single-use
|
|
1315
|
+
|| client.isRemote
|
|
1305
1316
|
|| (client.retired && client.active === 0)) { // this browser is retired to minimize garbage build-up
|
|
1306
1317
|
// this browser won't be reused, browser close DOES NOT automatically close context https://github.com/microsoft/playwright/issues/15163
|
|
1307
1318
|
await client.context.close();
|
|
@@ -1355,62 +1366,13 @@ function getAgent(options, url) {
|
|
|
1355
1366
|
});
|
|
1356
1367
|
}
|
|
1357
1368
|
|
|
1358
|
-
|
|
1359
|
-
const control = typeof options.control === 'function' ? options.control.toString() : null;
|
|
1360
|
-
|
|
1361
|
-
const res = await undici.fetch(`${options.remote.address}/request`, {
|
|
1362
|
-
method: 'post',
|
|
1363
|
-
body: JSON.stringify({
|
|
1364
|
-
url,
|
|
1365
|
-
method,
|
|
1366
|
-
options: {
|
|
1367
|
-
...options,
|
|
1368
|
-
control: control && control.slice(control.indexOf('{') + 1, control.lastIndexOf('}')),
|
|
1369
|
-
},
|
|
1370
|
-
}),
|
|
1371
|
-
headers: {
|
|
1372
|
-
'content-type': 'application/json',
|
|
1373
|
-
'unprint-key': options.remote.key,
|
|
1374
|
-
},
|
|
1375
|
-
});
|
|
1376
|
-
|
|
1377
|
-
if (res.status !== 200) {
|
|
1378
|
-
return {
|
|
1379
|
-
ok: false,
|
|
1380
|
-
status: res.status,
|
|
1381
|
-
statusText: res.statusText,
|
|
1382
|
-
};
|
|
1383
|
-
}
|
|
1384
|
-
|
|
1385
|
-
const body = await res.text();
|
|
1386
|
-
const data = JSON.parse(body);
|
|
1387
|
-
|
|
1388
|
-
events.emit('requestSuccess', {
|
|
1389
|
-
...feedbackBase,
|
|
1390
|
-
status: data.status,
|
|
1391
|
-
statusText: data.statusText,
|
|
1392
|
-
});
|
|
1393
|
-
|
|
1394
|
-
return curateResponse({
|
|
1395
|
-
status: data.status,
|
|
1396
|
-
statusText: data.statusText,
|
|
1397
|
-
headers: data.headers,
|
|
1398
|
-
}, data.data, options, {
|
|
1399
|
-
url,
|
|
1400
|
-
customOptions: options,
|
|
1401
|
-
control: data.control,
|
|
1402
|
-
});
|
|
1403
|
-
}
|
|
1404
|
-
|
|
1405
|
-
function useRemoteRequest(options, method) {
|
|
1369
|
+
function useRemoteRequest(options) {
|
|
1406
1370
|
if (options.remote.enable) {
|
|
1407
|
-
if (options.useRemote) {
|
|
1408
|
-
return
|
|
1371
|
+
if (typeof options.useRemote === 'boolean') {
|
|
1372
|
+
return options.useRemote;
|
|
1409
1373
|
}
|
|
1410
1374
|
|
|
1411
|
-
|
|
1412
|
-
return true;
|
|
1413
|
-
}
|
|
1375
|
+
return options.remote.use;
|
|
1414
1376
|
}
|
|
1415
1377
|
|
|
1416
1378
|
return false;
|
|
@@ -1427,7 +1389,8 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1427
1389
|
|
|
1428
1390
|
const { limiter, interval, concurrency } = getLimiter(url, options);
|
|
1429
1391
|
const agent = getAgent(options, url);
|
|
1430
|
-
const
|
|
1392
|
+
const useProxy = agent instanceof undici.ProxyAgent;
|
|
1393
|
+
const useRemote = useRemoteRequest(options);
|
|
1431
1394
|
|
|
1432
1395
|
const feedbackBase = {
|
|
1433
1396
|
url,
|
|
@@ -1436,18 +1399,14 @@ async function browserRequest(url, customOptions = {}) {
|
|
|
1436
1399
|
concurrency,
|
|
1437
1400
|
isProxied: agent instanceof undici.ProxyAgent,
|
|
1438
1401
|
isBrowser: true,
|
|
1439
|
-
isRemote,
|
|
1402
|
+
isRemote: useRemote,
|
|
1440
1403
|
options,
|
|
1441
1404
|
};
|
|
1442
1405
|
|
|
1443
1406
|
events.emit('requestInit', feedbackBase);
|
|
1444
1407
|
|
|
1445
|
-
if (isRemote) {
|
|
1446
|
-
return remoteRequest(url, 'browser', options, feedbackBase);
|
|
1447
|
-
}
|
|
1448
|
-
|
|
1449
1408
|
return limiter.schedule(async () => {
|
|
1450
|
-
const client = await getBrowserInstance(options.client, options,
|
|
1409
|
+
const client = await getBrowserInstance(options.client, options, useProxy, useRemote);
|
|
1451
1410
|
|
|
1452
1411
|
events.emit('browserOpen', {
|
|
1453
1412
|
keys: [client.key],
|
|
@@ -1623,7 +1582,6 @@ async function request(url, customOptions = {}, redirects = 0) {
|
|
|
1623
1582
|
const { limiter, interval, concurrency } = getLimiter(url, options);
|
|
1624
1583
|
|
|
1625
1584
|
const agent = getAgent(options, url);
|
|
1626
|
-
const isRemote = useRemoteRequest(options, method);
|
|
1627
1585
|
|
|
1628
1586
|
const feedbackBase = {
|
|
1629
1587
|
url,
|
|
@@ -1632,15 +1590,17 @@ async function request(url, customOptions = {}, redirects = 0) {
|
|
|
1632
1590
|
concurrency,
|
|
1633
1591
|
isProxied: agent instanceof undici.ProxyAgent,
|
|
1634
1592
|
isBrowser: false,
|
|
1635
|
-
isRemote,
|
|
1593
|
+
isRemote: false,
|
|
1636
1594
|
options,
|
|
1637
1595
|
};
|
|
1638
1596
|
|
|
1639
1597
|
events.emit('requestInit', feedbackBase);
|
|
1640
1598
|
|
|
1599
|
+
/* only for browsers atm
|
|
1641
1600
|
if (isRemote) {
|
|
1642
1601
|
return remoteRequest(url, method, options, feedbackBase);
|
|
1643
1602
|
}
|
|
1603
|
+
*/
|
|
1644
1604
|
|
|
1645
1605
|
const curatedBody = curateRequestBody(body, options);
|
|
1646
1606
|
const curatedCookie = getCookie(options);
|
|
@@ -1748,6 +1708,7 @@ const unprint = {
|
|
|
1748
1708
|
closeBrowser,
|
|
1749
1709
|
closeAllBrowsers,
|
|
1750
1710
|
getAllBrowsers,
|
|
1711
|
+
getBrowserInstance,
|
|
1751
1712
|
initialize: init,
|
|
1752
1713
|
initializeAll: initAll,
|
|
1753
1714
|
init,
|
|
@@ -1769,7 +1730,7 @@ const unprint = {
|
|
|
1769
1730
|
};
|
|
1770
1731
|
|
|
1771
1732
|
if (argv.server) {
|
|
1772
|
-
initServer(
|
|
1733
|
+
initServer(unprint);
|
|
1773
1734
|
}
|
|
1774
1735
|
|
|
1775
1736
|
module.exports = unprint;
|
package/src/server.js
CHANGED
|
@@ -1,119 +1,150 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const crypto = require('crypto');
|
|
4
|
+
const os = require('os');
|
|
5
|
+
const WebSocket = require('ws');
|
|
4
6
|
const express = require('express');
|
|
7
|
+
const expressWs = require('express-ws');
|
|
5
8
|
const timers = require('timers/promises');
|
|
9
|
+
const { chromium } = require('patchright');
|
|
10
|
+
const pidUsage = require('pidusage');
|
|
11
|
+
const pidTree = require('pidtree');
|
|
6
12
|
|
|
7
13
|
require('dotenv').config();
|
|
8
14
|
|
|
9
15
|
const pkg = require('../package.json');
|
|
10
16
|
|
|
17
|
+
const memoryLimit = Number(process.env.UNPRINT_MEMORY_LIMIT) || Math.round(Math.max(1024, (os.totalmem() / 1024 / 1024) * 0.3)); // MB, aim for 30% of total available
|
|
18
|
+
|
|
11
19
|
class HttpError extends Error {
|
|
12
|
-
constructor(message, httpCode
|
|
20
|
+
constructor(message, httpCode) {
|
|
13
21
|
super(message);
|
|
14
22
|
|
|
15
23
|
this.name = 'HttpError';
|
|
16
24
|
this.httpCode = httpCode;
|
|
17
|
-
|
|
18
|
-
if (friendlyMessage) {
|
|
19
|
-
this.friendlyMessage = friendlyMessage;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
if (data) {
|
|
23
|
-
this.data = data;
|
|
24
|
-
}
|
|
25
25
|
}
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
-
|
|
29
|
-
const now = new Date();
|
|
30
|
-
|
|
31
|
-
console.log(`${now.toISOString()} [${level.slice(0, 5).padStart(5, ' ')}] ${data.join(' ')}`);
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const logger = Object.fromEntries([
|
|
35
|
-
'info',
|
|
36
|
-
'debug',
|
|
28
|
+
const logLevels = [
|
|
37
29
|
'error',
|
|
38
30
|
'warn',
|
|
39
|
-
|
|
31
|
+
'info',
|
|
32
|
+
'debug',
|
|
33
|
+
'silly',
|
|
34
|
+
];
|
|
40
35
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
enable: false,
|
|
47
|
-
},
|
|
48
|
-
useRemote: false,
|
|
49
|
-
control: options.control
|
|
50
|
-
? async function control() {}.constructor('page', 'client', options.control) // eslint-disable-line no-eval,no-new-func,no-empty-function
|
|
51
|
-
: null,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
36
|
+
const logLevel = process.env.UNPRINT_LOG_LEVEL || 'info';
|
|
37
|
+
|
|
38
|
+
function log(level, ...data) {
|
|
39
|
+
if (logLevels.indexOf(level) <= logLevels.indexOf(logLevel)) {
|
|
40
|
+
const now = new Date();
|
|
54
41
|
|
|
55
|
-
|
|
56
|
-
if (!req.body?.url) {
|
|
57
|
-
throw new HttpError('No URL provided', 400);
|
|
42
|
+
console.log(`${now.toISOString()} [${level.slice(0, 5).padStart(5, ' ')}] ${data.join(' ')}`);
|
|
58
43
|
}
|
|
44
|
+
}
|
|
59
45
|
|
|
60
|
-
|
|
46
|
+
const logger = Object.fromEntries(logLevels.map((level) => [level, (...data) => log(level, ...data)]));
|
|
61
47
|
|
|
62
|
-
|
|
48
|
+
function closeSocket(socket, code, reason) {
|
|
49
|
+
const safeCode = code >= 1000 && code <= 1015 && code !== 1006
|
|
50
|
+
? code
|
|
51
|
+
: 1000;
|
|
63
52
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
}
|
|
53
|
+
try {
|
|
54
|
+
socket.close(safeCode, reason);
|
|
55
|
+
} catch (error) {
|
|
56
|
+
// probably already closed
|
|
57
|
+
}
|
|
58
|
+
}
|
|
69
59
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
statusText: unprintRes.statusText,
|
|
74
|
-
data: unprintRes.data || null,
|
|
75
|
-
body: unprintRes.body || null,
|
|
76
|
-
html: unprintRes.context?.html || null,
|
|
77
|
-
headers: unprintRes.headers,
|
|
78
|
-
cookies: unprintRes.cookies,
|
|
79
|
-
control: unprintRes.control,
|
|
60
|
+
async function getClient() {
|
|
61
|
+
const browser = await chromium.launchServer({
|
|
62
|
+
headless: false,
|
|
80
63
|
});
|
|
81
|
-
}
|
|
82
64
|
|
|
83
|
-
|
|
84
|
-
await timers.setTimeout(60_000);
|
|
65
|
+
const endpoint = browser.wsEndpoint();
|
|
85
66
|
|
|
86
|
-
|
|
67
|
+
return {
|
|
68
|
+
browser,
|
|
69
|
+
endpoint,
|
|
70
|
+
active: 0,
|
|
71
|
+
isRetired: false,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
87
74
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
return unprint.closeBrowser(client, { client: null });
|
|
91
|
-
}
|
|
75
|
+
async function initServer() {
|
|
76
|
+
const app = express();
|
|
92
77
|
|
|
93
|
-
|
|
94
|
-
|
|
78
|
+
const address = process.env.UNPRINT_HOST || '127.0.0.1:3333';
|
|
79
|
+
const [host, portString] = address.split(':');
|
|
80
|
+
const port = portString ? Number(portString) : 3333;
|
|
81
|
+
|
|
82
|
+
let client = await getClient();
|
|
83
|
+
|
|
84
|
+
async function monitorBrowser() {
|
|
85
|
+
await timers.setTimeout(60_000); // 1 minute
|
|
86
|
+
|
|
87
|
+
try {
|
|
88
|
+
const pid = client.browser.process().pid;
|
|
89
|
+
const pids = await pidTree(pid, { root: true });
|
|
90
|
+
const usages = await pidUsage(pids);
|
|
91
|
+
const memoryUsage = Math.round(Object.values(usages).reduce((acc, usage) => acc + usage.memory, 0) / 1024 / 1024); // MB
|
|
92
|
+
|
|
93
|
+
if (memoryUsage >= memoryLimit) {
|
|
94
|
+
logger.info(`Cycling browser at ${memoryUsage.toLocaleString()}MB / ${memoryLimit.toLocaleString()}MB`);
|
|
95
|
+
|
|
96
|
+
const retiredClient = client;
|
|
97
|
+
retiredClient.isRetired = true;
|
|
98
|
+
|
|
99
|
+
try {
|
|
100
|
+
client = await getClient();
|
|
101
|
+
|
|
102
|
+
if (retiredClient.active === 0) {
|
|
103
|
+
await retiredClient.browser.close();
|
|
104
|
+
} else {
|
|
105
|
+
// don't await promise timeout to maintain monitor loop pace
|
|
106
|
+
setTimeout(async () => {
|
|
107
|
+
if (retiredClient.active > 0) {
|
|
108
|
+
logger.warn(`Force closing retired browser with ${retiredClient.active} clients`);
|
|
109
|
+
await retiredClient.browser.close();
|
|
110
|
+
}
|
|
111
|
+
}, 300_000); // 5 minutes
|
|
112
|
+
}
|
|
113
|
+
} catch (error) {
|
|
114
|
+
logger.warn(`Failed to retire client: ${error.message}`);
|
|
115
|
+
|
|
116
|
+
retiredClient.isRetired = false;
|
|
117
|
+
client = retiredClient;
|
|
118
|
+
}
|
|
119
|
+
} else {
|
|
120
|
+
logger.debug(`Reusing browser at ${memoryUsage.toLocaleString()}MB / ${memoryLimit.toLocaleString()}MB`);
|
|
121
|
+
}
|
|
122
|
+
} catch (error) {
|
|
123
|
+
logger.info(`No browser PID, probably relaunching: ${error.message}`);
|
|
124
|
+
}
|
|
95
125
|
|
|
96
|
-
|
|
126
|
+
monitorBrowser();
|
|
127
|
+
}
|
|
97
128
|
|
|
98
|
-
|
|
129
|
+
monitorBrowser();
|
|
99
130
|
|
|
100
|
-
|
|
101
|
-
|
|
131
|
+
expressWs(app);
|
|
132
|
+
app.use(express.json());
|
|
102
133
|
|
|
103
|
-
async
|
|
104
|
-
|
|
105
|
-
|
|
134
|
+
app.use(async (req, _res, next) => {
|
|
135
|
+
if (process.env.UNPRINT_KEY && req.path !== '/') {
|
|
136
|
+
if (!req.headers['unprint-key']) {
|
|
137
|
+
logger.warn(`Unauthenticated request from ${req.ip}`);
|
|
106
138
|
|
|
107
|
-
|
|
108
|
-
|
|
139
|
+
throw new HttpError('Missing key', 401);
|
|
140
|
+
}
|
|
109
141
|
|
|
110
|
-
|
|
142
|
+
if (process.env.UNPRINT_KEY.length !== req.headers['unprint-key'].length || !crypto.timingSafeEqual(
|
|
143
|
+
Buffer.from(process.env.UNPRINT_KEY, 'utf16le'),
|
|
144
|
+
Buffer.from(req.headers['unprint-key'], 'utf16le'),
|
|
145
|
+
)) {
|
|
146
|
+
logger.warn(`Invalid key from ${req.ip}`);
|
|
111
147
|
|
|
112
|
-
app.use(async (req, _res, next) => {
|
|
113
|
-
if (process.env.UNPRINT_KEY) {
|
|
114
|
-
if (process.env.UNPRINT_KEY.length !== req.headers['unprint-key']?.length
|
|
115
|
-
|| !crypto.timingSafeEqual(Buffer.from(process.env.UNPRINT_KEY, 'utf16le'), Buffer.from(req.headers['unprint-key'], 'utf16le'))) {
|
|
116
|
-
logger.warn(`Invalid key used by ${req.ip}`);
|
|
117
148
|
throw new HttpError('Invalid key', 401);
|
|
118
149
|
}
|
|
119
150
|
}
|
|
@@ -125,21 +156,6 @@ async function initServer(address, unprint) {
|
|
|
125
156
|
res.send(`unprint ${pkg.version}`);
|
|
126
157
|
});
|
|
127
158
|
|
|
128
|
-
app.post('/request', async (req, res) => handleRequest(req, res, unprint));
|
|
129
|
-
app.post('/browser', async (req, res) => handleRequest(req, res, unprint, 'browser'));
|
|
130
|
-
|
|
131
|
-
app.post('/options', async (req, res) => {
|
|
132
|
-
if (!req.body) {
|
|
133
|
-
throw new HttpError('No options provided', 400);
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
unprint.options(curateOptions(req.body));
|
|
137
|
-
|
|
138
|
-
logger.info('Configuration updated');
|
|
139
|
-
|
|
140
|
-
res.status(204).send();
|
|
141
|
-
});
|
|
142
|
-
|
|
143
159
|
app.use((error, _req, res, _next) => {
|
|
144
160
|
logger.error(error);
|
|
145
161
|
|
|
@@ -149,16 +165,69 @@ async function initServer(address, unprint) {
|
|
|
149
165
|
});
|
|
150
166
|
});
|
|
151
167
|
|
|
168
|
+
app.ws('/browser', (clientSocket, _req) => {
|
|
169
|
+
const currentClient = client;
|
|
170
|
+
const browserSocket = new WebSocket(currentClient.endpoint);
|
|
171
|
+
|
|
172
|
+
let queue = [];
|
|
173
|
+
|
|
174
|
+
logger.info('Client connected');
|
|
175
|
+
|
|
176
|
+
currentClient.active += 1;
|
|
177
|
+
|
|
178
|
+
clientSocket.on('message', (data) => {
|
|
179
|
+
logger.silly(`Socket data (${browserSocket.readyState === WebSocket.OPEN ? 'sent' : 'queued'}): ${data}`);
|
|
180
|
+
|
|
181
|
+
if (browserSocket.readyState === WebSocket.OPEN) {
|
|
182
|
+
browserSocket.send(data);
|
|
183
|
+
} else {
|
|
184
|
+
queue.push(data);
|
|
185
|
+
}
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
browserSocket.on('open', () => {
|
|
189
|
+
logger.debug(`Browser connected, clearing ${queue.length} queue messages`);
|
|
190
|
+
|
|
191
|
+
queue.forEach((data) => browserSocket.send(data));
|
|
192
|
+
queue = [];
|
|
193
|
+
|
|
194
|
+
browserSocket.on('message', (data) => {
|
|
195
|
+
if (clientSocket.readyState === WebSocket.OPEN) {
|
|
196
|
+
clientSocket.send(data);
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
clientSocket.on('close', async (code, reason) => {
|
|
202
|
+
closeSocket(browserSocket, code, reason);
|
|
203
|
+
|
|
204
|
+
currentClient.active -= 1;
|
|
205
|
+
|
|
206
|
+
logger.info('Client disconnected');
|
|
207
|
+
|
|
208
|
+
if (currentClient.isRetired && currentClient.active === 0) {
|
|
209
|
+
await currentClient.browser.close();
|
|
210
|
+
logger.info('Browser retired');
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
browserSocket.on('close', (code, reason) => {
|
|
215
|
+
closeSocket(clientSocket, code, reason);
|
|
216
|
+
logger.debug('Browser disconnected');
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
clientSocket.on('error', (error) => logger.error(`Client error: ${error}`));
|
|
220
|
+
browserSocket.on('error', (error) => logger.error(`Browser error: ${error}`));
|
|
221
|
+
});
|
|
222
|
+
|
|
152
223
|
app.listen(port, host, (error) => {
|
|
153
224
|
if (error) {
|
|
154
225
|
logger.error(`Failed to start server: ${error.message}`);
|
|
155
226
|
return;
|
|
156
227
|
}
|
|
157
228
|
|
|
158
|
-
logger.info(`
|
|
229
|
+
logger.info(`unprint server listening on http://${host}:${port}`);
|
|
159
230
|
});
|
|
160
|
-
|
|
161
|
-
monitorBrowsers(unprint);
|
|
162
231
|
}
|
|
163
232
|
|
|
164
233
|
module.exports = initServer;
|
package/tests/remote.js
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
// const { chromium } = require('patchright');
|
|
4
|
+
|
|
3
5
|
const unprint = require('../src/app');
|
|
4
6
|
|
|
7
|
+
const key = 'foobar';
|
|
8
|
+
|
|
5
9
|
unprint.configure({
|
|
6
10
|
remote: {
|
|
7
11
|
enable: true,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
use: false,
|
|
13
|
+
address: 'ws://127.0.0.1:3333/browser',
|
|
14
|
+
key,
|
|
11
15
|
},
|
|
12
16
|
});
|
|
13
17
|
|
|
@@ -18,9 +22,7 @@ async function init() {
|
|
|
18
22
|
const res = await unprint.browser('https://www.google.com', {
|
|
19
23
|
useRemote: true,
|
|
20
24
|
async control(page) {
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
return form.count();
|
|
25
|
+
return page.locator('form').count();
|
|
24
26
|
},
|
|
25
27
|
});
|
|
26
28
|
|
|
@@ -36,3 +38,29 @@ async function init() {
|
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
init();
|
|
41
|
+
|
|
42
|
+
/*
|
|
43
|
+
async function initRaw() {
|
|
44
|
+
const browser = await chromium.connect('ws://127.0.0.1:3333/browser', {
|
|
45
|
+
headers: {
|
|
46
|
+
'unprint-key': key,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
// await timers.setTimeout(2000);
|
|
51
|
+
|
|
52
|
+
const context = await browser.newContext();
|
|
53
|
+
const page = await context.newPage();
|
|
54
|
+
|
|
55
|
+
await page.goto('https://jsonplaceholder.typicode.com');
|
|
56
|
+
await page.locator('.mb-one').hover({ trial: true, timeout: 10000, strict: false });
|
|
57
|
+
|
|
58
|
+
const content = await page.content();
|
|
59
|
+
|
|
60
|
+
console.log(content);
|
|
61
|
+
|
|
62
|
+
await page.close();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
initRaw();
|
|
66
|
+
*/
|