unprint 0.11.12 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/package.json +1 -1
- package/src/app.js +130 -13
- package/tests/init.js +14 -1
package/README.md
CHANGED
|
@@ -13,6 +13,17 @@ unprint.options({
|
|
|
13
13
|
headers: {
|
|
14
14
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
|
|
15
15
|
},
|
|
16
|
+
limits: { // request throttling
|
|
17
|
+
default: {
|
|
18
|
+
concurrency: 10,
|
|
19
|
+
interval: 10, // ms
|
|
20
|
+
},
|
|
21
|
+
[hostname]: {
|
|
22
|
+
enable: true, // enabled by default
|
|
23
|
+
concurrency: 1,
|
|
24
|
+
interval: 1000,
|
|
25
|
+
},
|
|
26
|
+
},
|
|
16
27
|
})
|
|
17
28
|
```
|
|
18
29
|
|
|
@@ -195,3 +206,13 @@ Returns
|
|
|
195
206
|
res, // (object) alias for 'response'
|
|
196
207
|
}
|
|
197
208
|
```
|
|
209
|
+
|
|
210
|
+
### Feedback events
|
|
211
|
+
Usage:
|
|
212
|
+
* `unprint.on('trigger', callbackFn)`
|
|
213
|
+
* `unprint.off('trigger', callbackFn)`
|
|
214
|
+
|
|
215
|
+
Triggers:
|
|
216
|
+
* `requestInit`: A HTTP request is about to be made
|
|
217
|
+
* `requestSuccess`: The HTTP request completed with an OK status code
|
|
218
|
+
* `requestError`: The HTTP request completed with an error status code
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const { JSDOM, VirtualConsole } = require('jsdom');
|
|
4
|
+
const EventEmitter = require('events');
|
|
4
5
|
const http = require('http');
|
|
5
6
|
const https = require('https');
|
|
6
7
|
const axios = require('axios').default;
|
|
8
|
+
const Bottleneck = require('bottleneck');
|
|
7
9
|
const moment = require('moment-timezone');
|
|
8
10
|
const merge = require('deepmerge');
|
|
9
11
|
|
|
@@ -11,17 +13,33 @@ const settings = {
|
|
|
11
13
|
throwErrors: false,
|
|
12
14
|
logErrors: true,
|
|
13
15
|
requestTimeout: 30000,
|
|
16
|
+
limits: {
|
|
17
|
+
default: {
|
|
18
|
+
interval: 10,
|
|
19
|
+
concurrency: 10,
|
|
20
|
+
},
|
|
21
|
+
},
|
|
14
22
|
};
|
|
15
23
|
|
|
16
24
|
const virtualConsole = new VirtualConsole();
|
|
17
25
|
const { window: globalWindow } = new JSDOM('', { virtualConsole });
|
|
18
26
|
|
|
27
|
+
let globalOptions = {
|
|
28
|
+
...settings,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const events = new EventEmitter();
|
|
32
|
+
|
|
33
|
+
function configure(newOptions) {
|
|
34
|
+
globalOptions = merge(globalOptions, newOptions);
|
|
35
|
+
}
|
|
36
|
+
|
|
19
37
|
function handleError(error, code) {
|
|
20
|
-
if (
|
|
38
|
+
if (globalOptions.logErrors) {
|
|
21
39
|
console.error(`unprint encountered an error (${code}): ${error.message}`);
|
|
22
40
|
}
|
|
23
41
|
|
|
24
|
-
if (
|
|
42
|
+
if (globalOptions.throwErrors) {
|
|
25
43
|
throw Object.assign(error, { code });
|
|
26
44
|
}
|
|
27
45
|
|
|
@@ -31,12 +49,6 @@ function handleError(error, code) {
|
|
|
31
49
|
virtualConsole.on('error', (message) => handleError(message, 'JSDOM'));
|
|
32
50
|
virtualConsole.on('jsdomError', (message) => handleError(message, 'JSDOM'));
|
|
33
51
|
|
|
34
|
-
let globalOptions = {};
|
|
35
|
-
|
|
36
|
-
function configure(newOptions) {
|
|
37
|
-
globalOptions = newOptions;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
52
|
function trim(string) {
|
|
41
53
|
if (typeof string === 'string') {
|
|
42
54
|
return string.trim().replace(/\s+/g, ' ');
|
|
@@ -217,11 +229,23 @@ function extractNumber(rawNumberString, customOptions) {
|
|
|
217
229
|
: rawNumberString.replace(',', '');
|
|
218
230
|
|
|
219
231
|
if (numberString && options.match) {
|
|
220
|
-
|
|
232
|
+
const number = Number(numberString.match(options.match)?.[options.matchIndex]);
|
|
233
|
+
|
|
234
|
+
if (Number.isNaN(number)) {
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
return number;
|
|
221
239
|
}
|
|
222
240
|
|
|
223
241
|
if (numberString) {
|
|
224
|
-
|
|
242
|
+
const number = Number(numberString);
|
|
243
|
+
|
|
244
|
+
if (Number.isNaN(number)) {
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return number;
|
|
225
249
|
}
|
|
226
250
|
|
|
227
251
|
return null;
|
|
@@ -761,13 +785,27 @@ function isDomObject(element) {
|
|
|
761
785
|
|
|
762
786
|
function initQueryFns(fns, context) {
|
|
763
787
|
if (context) {
|
|
764
|
-
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) =>
|
|
788
|
+
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => {
|
|
789
|
+
events.emit('query', {
|
|
790
|
+
key,
|
|
791
|
+
args,
|
|
792
|
+
origin: context.options.origin,
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
return fn(context, ...args);
|
|
796
|
+
}]));
|
|
765
797
|
}
|
|
766
798
|
|
|
767
799
|
// context is passed directly to query method
|
|
768
800
|
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => {
|
|
769
801
|
// first argument is already an unprint context. this seems like a convoluted approach, but there is little reason not to allow it
|
|
770
802
|
if (args[0]?.isUnprint) {
|
|
803
|
+
events.emit('query', {
|
|
804
|
+
key,
|
|
805
|
+
args,
|
|
806
|
+
origin: context.options.origin,
|
|
807
|
+
});
|
|
808
|
+
|
|
771
809
|
return fn(...args);
|
|
772
810
|
}
|
|
773
811
|
|
|
@@ -775,6 +813,12 @@ function initQueryFns(fns, context) {
|
|
|
775
813
|
if (isDomObject(args[0])) {
|
|
776
814
|
const element = args[0];
|
|
777
815
|
|
|
816
|
+
events.emit('query', {
|
|
817
|
+
key,
|
|
818
|
+
args,
|
|
819
|
+
origin: context.options.origin,
|
|
820
|
+
});
|
|
821
|
+
|
|
778
822
|
return fn({
|
|
779
823
|
element,
|
|
780
824
|
html: element.outerHTML || element.body?.outerHTML,
|
|
@@ -848,6 +892,44 @@ function initAll(context, selector, options = {}) {
|
|
|
848
892
|
.map((element) => init(element, null, options));
|
|
849
893
|
}
|
|
850
894
|
|
|
895
|
+
const limiters = {
|
|
896
|
+
default: new Bottleneck(),
|
|
897
|
+
};
|
|
898
|
+
|
|
899
|
+
function getLimiterValue(prop, options, hostname) {
|
|
900
|
+
if (options[prop] !== undefined) {
|
|
901
|
+
return options[prop];
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
if (options.limits[hostname]?.enable !== false && options.limits[hostname]?.[prop] !== undefined) {
|
|
905
|
+
return options.limits[hostname][prop];
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
return options.limits.default[prop];
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
function getLimiter(url, options) {
|
|
912
|
+
const { hostname } = new URL(url);
|
|
913
|
+
|
|
914
|
+
const interval = getLimiterValue('interval', options, hostname);
|
|
915
|
+
const concurrency = getLimiterValue('concurrency', options, hostname);
|
|
916
|
+
|
|
917
|
+
if (!limiters[interval]?.[concurrency]) {
|
|
918
|
+
limiters[interval] = limiters[interval] || {};
|
|
919
|
+
|
|
920
|
+
limiters[interval][concurrency] = new Bottleneck({
|
|
921
|
+
minTime: interval,
|
|
922
|
+
maxConcurrent: concurrency,
|
|
923
|
+
timeout: options.timeout + 10000, // timeout 10 seconds after axious should
|
|
924
|
+
});
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
return {
|
|
928
|
+
limiter: limiters[interval][concurrency],
|
|
929
|
+
interval: concurrency,
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
|
|
851
933
|
async function request(url, body, customOptions = {}, method = 'GET') {
|
|
852
934
|
const options = merge.all([{
|
|
853
935
|
timeout: 1000,
|
|
@@ -855,7 +937,19 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
855
937
|
url,
|
|
856
938
|
}, globalOptions, customOptions]);
|
|
857
939
|
|
|
858
|
-
const
|
|
940
|
+
const { limiter, interval, concurrency } = getLimiter(url, options);
|
|
941
|
+
|
|
942
|
+
const feedbackBase = {
|
|
943
|
+
url,
|
|
944
|
+
method,
|
|
945
|
+
interval,
|
|
946
|
+
concurrency,
|
|
947
|
+
options,
|
|
948
|
+
};
|
|
949
|
+
|
|
950
|
+
events.emit('requestInit', feedbackBase);
|
|
951
|
+
|
|
952
|
+
const res = await limiter.schedule(async () => axios({
|
|
859
953
|
url,
|
|
860
954
|
method,
|
|
861
955
|
data: body,
|
|
@@ -865,11 +959,17 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
865
959
|
signal: options.abortSignal,
|
|
866
960
|
httpAgent: options.httpAgent || new http.Agent({ ...options.agent }),
|
|
867
961
|
httpsAgent: options.httpsAgent || new https.Agent({ ...options.agent }),
|
|
868
|
-
});
|
|
962
|
+
}));
|
|
869
963
|
|
|
870
964
|
if (!(res.status >= 200 && res.status < 300)) {
|
|
871
965
|
handleError(new Error(`HTTP response from ${url} not OK (${res.status} ${res.statusText}): ${res.data}`), 'HTTP_NOT_OK');
|
|
872
966
|
|
|
967
|
+
events.emit('requestError', {
|
|
968
|
+
...feedbackBase,
|
|
969
|
+
status: res.status,
|
|
970
|
+
statusText: res.statusText,
|
|
971
|
+
});
|
|
972
|
+
|
|
873
973
|
return {
|
|
874
974
|
ok: false,
|
|
875
975
|
status: res.status,
|
|
@@ -887,6 +987,12 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
887
987
|
res,
|
|
888
988
|
};
|
|
889
989
|
|
|
990
|
+
events.emit('requestSuccess', {
|
|
991
|
+
...feedbackBase,
|
|
992
|
+
status: res.status,
|
|
993
|
+
statusText: res.statusText,
|
|
994
|
+
});
|
|
995
|
+
|
|
890
996
|
if (res.headers['content-type'].includes('application/json') && typeof res.data === 'object') {
|
|
891
997
|
return {
|
|
892
998
|
...base,
|
|
@@ -921,8 +1027,19 @@ async function post(url, body, options) {
|
|
|
921
1027
|
return request(url, body, options, 'POST');
|
|
922
1028
|
}
|
|
923
1029
|
|
|
1030
|
+
function on(trigger, fn) {
|
|
1031
|
+
events.on(trigger, fn);
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
function off(trigger, fn) {
|
|
1035
|
+
events.off(trigger, fn);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
924
1038
|
module.exports = {
|
|
925
1039
|
configure,
|
|
1040
|
+
on,
|
|
1041
|
+
off,
|
|
1042
|
+
events,
|
|
926
1043
|
get,
|
|
927
1044
|
post,
|
|
928
1045
|
request,
|
package/tests/init.js
CHANGED
|
@@ -10,7 +10,20 @@ const data = require('./data.json');
|
|
|
10
10
|
const port = process.env.PORT || 3101;
|
|
11
11
|
|
|
12
12
|
async function initTest() {
|
|
13
|
-
unprint.options({
|
|
13
|
+
unprint.options({
|
|
14
|
+
headers: { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' },
|
|
15
|
+
limits: {
|
|
16
|
+
default: {
|
|
17
|
+
concurrency: 1,
|
|
18
|
+
interval: 100,
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
unprint.on('requestInit', (initData) => console.log('init', initData));
|
|
24
|
+
unprint.on('requestError', (errorData) => console.error('error', errorData));
|
|
25
|
+
unprint.on('requestSuccess', (successData) => console.log('success', successData));
|
|
26
|
+
unprint.on('query', (queryData) => console.log('query', queryData));
|
|
14
27
|
|
|
15
28
|
const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
|
|
16
29
|
// const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
|