unprint 0.11.13 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/package.json +1 -1
- package/src/app.js +116 -11
- package/tests/init.js +14 -1
package/README.md
CHANGED
|
@@ -13,6 +13,17 @@ unprint.options({
|
|
|
13
13
|
headers: {
|
|
14
14
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
|
|
15
15
|
},
|
|
16
|
+
limits: { // request throttling
|
|
17
|
+
default: {
|
|
18
|
+
concurrency: 10,
|
|
19
|
+
interval: 10, // ms
|
|
20
|
+
},
|
|
21
|
+
[hostname]: {
|
|
22
|
+
enable: true, // enabled by default
|
|
23
|
+
concurrency: 1,
|
|
24
|
+
interval: 1000,
|
|
25
|
+
},
|
|
26
|
+
},
|
|
16
27
|
})
|
|
17
28
|
```
|
|
18
29
|
|
|
@@ -195,3 +206,13 @@ Returns
|
|
|
195
206
|
res, // (object) alias for 'response'
|
|
196
207
|
}
|
|
197
208
|
```
|
|
209
|
+
|
|
210
|
+
### Feedback events
|
|
211
|
+
Usage:
|
|
212
|
+
* `unprint.on('trigger', callbackFn)`
|
|
213
|
+
* `unprint.off('trigger', callbackFn)`
|
|
214
|
+
|
|
215
|
+
Triggers:
|
|
216
|
+
* `requestInit`: A HTTP request is about to be made
|
|
217
|
+
* `requestSuccess`: The HTTP request completed with an OK status code
|
|
218
|
+
* `requestError`: The HTTP request completed with an error status code
|
package/package.json
CHANGED
package/src/app.js
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
const { JSDOM, VirtualConsole } = require('jsdom');
|
|
4
|
+
const EventEmitter = require('events');
|
|
4
5
|
const http = require('http');
|
|
5
6
|
const https = require('https');
|
|
6
7
|
const axios = require('axios').default;
|
|
8
|
+
const Bottleneck = require('bottleneck');
|
|
7
9
|
const moment = require('moment-timezone');
|
|
8
10
|
const merge = require('deepmerge');
|
|
9
11
|
|
|
@@ -11,17 +13,33 @@ const settings = {
|
|
|
11
13
|
throwErrors: false,
|
|
12
14
|
logErrors: true,
|
|
13
15
|
requestTimeout: 30000,
|
|
16
|
+
limits: {
|
|
17
|
+
default: {
|
|
18
|
+
interval: 10,
|
|
19
|
+
concurrency: 10,
|
|
20
|
+
},
|
|
21
|
+
},
|
|
14
22
|
};
|
|
15
23
|
|
|
16
24
|
const virtualConsole = new VirtualConsole();
|
|
17
25
|
const { window: globalWindow } = new JSDOM('', { virtualConsole });
|
|
18
26
|
|
|
27
|
+
let globalOptions = {
|
|
28
|
+
...settings,
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const events = new EventEmitter();
|
|
32
|
+
|
|
33
|
+
function configure(newOptions) {
|
|
34
|
+
globalOptions = merge(globalOptions, newOptions);
|
|
35
|
+
}
|
|
36
|
+
|
|
19
37
|
function handleError(error, code) {
|
|
20
|
-
if (
|
|
38
|
+
if (globalOptions.logErrors) {
|
|
21
39
|
console.error(`unprint encountered an error (${code}): ${error.message}`);
|
|
22
40
|
}
|
|
23
41
|
|
|
24
|
-
if (
|
|
42
|
+
if (globalOptions.throwErrors) {
|
|
25
43
|
throw Object.assign(error, { code });
|
|
26
44
|
}
|
|
27
45
|
|
|
@@ -31,12 +49,6 @@ function handleError(error, code) {
|
|
|
31
49
|
virtualConsole.on('error', (message) => handleError(message, 'JSDOM'));
|
|
32
50
|
virtualConsole.on('jsdomError', (message) => handleError(message, 'JSDOM'));
|
|
33
51
|
|
|
34
|
-
let globalOptions = {};
|
|
35
|
-
|
|
36
|
-
function configure(newOptions) {
|
|
37
|
-
globalOptions = newOptions;
|
|
38
|
-
}
|
|
39
|
-
|
|
40
52
|
function trim(string) {
|
|
41
53
|
if (typeof string === 'string') {
|
|
42
54
|
return string.trim().replace(/\s+/g, ' ');
|
|
@@ -773,13 +785,27 @@ function isDomObject(element) {
|
|
|
773
785
|
|
|
774
786
|
function initQueryFns(fns, context) {
|
|
775
787
|
if (context) {
|
|
776
|
-
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) =>
|
|
788
|
+
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => {
|
|
789
|
+
events.emit('query', {
|
|
790
|
+
key,
|
|
791
|
+
args,
|
|
792
|
+
origin: context.options.origin,
|
|
793
|
+
});
|
|
794
|
+
|
|
795
|
+
return fn(context, ...args);
|
|
796
|
+
}]));
|
|
777
797
|
}
|
|
778
798
|
|
|
779
799
|
// context is passed directly to query method
|
|
780
800
|
return Object.fromEntries(Object.entries(fns).map(([key, fn]) => [key, (...args) => {
|
|
781
801
|
// first argument is already an unprint context. this seems like a convoluted approach, but there is little reason not to allow it
|
|
782
802
|
if (args[0]?.isUnprint) {
|
|
803
|
+
events.emit('query', {
|
|
804
|
+
key,
|
|
805
|
+
args,
|
|
806
|
+
origin: context.options.origin,
|
|
807
|
+
});
|
|
808
|
+
|
|
783
809
|
return fn(...args);
|
|
784
810
|
}
|
|
785
811
|
|
|
@@ -787,6 +813,12 @@ function initQueryFns(fns, context) {
|
|
|
787
813
|
if (isDomObject(args[0])) {
|
|
788
814
|
const element = args[0];
|
|
789
815
|
|
|
816
|
+
events.emit('query', {
|
|
817
|
+
key,
|
|
818
|
+
args,
|
|
819
|
+
origin: context.options.origin,
|
|
820
|
+
});
|
|
821
|
+
|
|
790
822
|
return fn({
|
|
791
823
|
element,
|
|
792
824
|
html: element.outerHTML || element.body?.outerHTML,
|
|
@@ -860,6 +892,44 @@ function initAll(context, selector, options = {}) {
|
|
|
860
892
|
.map((element) => init(element, null, options));
|
|
861
893
|
}
|
|
862
894
|
|
|
895
|
+
const limiters = {
|
|
896
|
+
default: new Bottleneck(),
|
|
897
|
+
};
|
|
898
|
+
|
|
899
|
+
function getLimiterValue(prop, options, hostname) {
|
|
900
|
+
if (options[prop] !== undefined) {
|
|
901
|
+
return options[prop];
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
if (options.limits[hostname]?.enable !== false && options.limits[hostname]?.[prop] !== undefined) {
|
|
905
|
+
return options.limits[hostname][prop];
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
return options.limits.default[prop];
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
function getLimiter(url, options) {
|
|
912
|
+
const { hostname } = new URL(url);
|
|
913
|
+
|
|
914
|
+
const interval = getLimiterValue('interval', options, hostname);
|
|
915
|
+
const concurrency = getLimiterValue('concurrency', options, hostname);
|
|
916
|
+
|
|
917
|
+
if (!limiters[interval]?.[concurrency]) {
|
|
918
|
+
limiters[interval] = limiters[interval] || {};
|
|
919
|
+
|
|
920
|
+
limiters[interval][concurrency] = new Bottleneck({
|
|
921
|
+
minTime: interval,
|
|
922
|
+
maxConcurrent: concurrency,
|
|
923
|
+
timeout: options.timeout + 10000, // timeout 10 seconds after axious should
|
|
924
|
+
});
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
return {
|
|
928
|
+
limiter: limiters[interval][concurrency],
|
|
929
|
+
interval: concurrency,
|
|
930
|
+
};
|
|
931
|
+
}
|
|
932
|
+
|
|
863
933
|
async function request(url, body, customOptions = {}, method = 'GET') {
|
|
864
934
|
const options = merge.all([{
|
|
865
935
|
timeout: 1000,
|
|
@@ -867,7 +937,19 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
867
937
|
url,
|
|
868
938
|
}, globalOptions, customOptions]);
|
|
869
939
|
|
|
870
|
-
const
|
|
940
|
+
const { limiter, interval, concurrency } = getLimiter(url, options);
|
|
941
|
+
|
|
942
|
+
const feedbackBase = {
|
|
943
|
+
url,
|
|
944
|
+
method,
|
|
945
|
+
interval,
|
|
946
|
+
concurrency,
|
|
947
|
+
options,
|
|
948
|
+
};
|
|
949
|
+
|
|
950
|
+
events.emit('requestInit', feedbackBase);
|
|
951
|
+
|
|
952
|
+
const res = await limiter.schedule(async () => axios({
|
|
871
953
|
url,
|
|
872
954
|
method,
|
|
873
955
|
data: body,
|
|
@@ -877,11 +959,17 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
877
959
|
signal: options.abortSignal,
|
|
878
960
|
httpAgent: options.httpAgent || new http.Agent({ ...options.agent }),
|
|
879
961
|
httpsAgent: options.httpsAgent || new https.Agent({ ...options.agent }),
|
|
880
|
-
});
|
|
962
|
+
}));
|
|
881
963
|
|
|
882
964
|
if (!(res.status >= 200 && res.status < 300)) {
|
|
883
965
|
handleError(new Error(`HTTP response from ${url} not OK (${res.status} ${res.statusText}): ${res.data}`), 'HTTP_NOT_OK');
|
|
884
966
|
|
|
967
|
+
events.emit('requestError', {
|
|
968
|
+
...feedbackBase,
|
|
969
|
+
status: res.status,
|
|
970
|
+
statusText: res.statusText,
|
|
971
|
+
});
|
|
972
|
+
|
|
885
973
|
return {
|
|
886
974
|
ok: false,
|
|
887
975
|
status: res.status,
|
|
@@ -899,6 +987,12 @@ async function request(url, body, customOptions = {}, method = 'GET') {
|
|
|
899
987
|
res,
|
|
900
988
|
};
|
|
901
989
|
|
|
990
|
+
events.emit('requestSuccess', {
|
|
991
|
+
...feedbackBase,
|
|
992
|
+
status: res.status,
|
|
993
|
+
statusText: res.statusText,
|
|
994
|
+
});
|
|
995
|
+
|
|
902
996
|
if (res.headers['content-type'].includes('application/json') && typeof res.data === 'object') {
|
|
903
997
|
return {
|
|
904
998
|
...base,
|
|
@@ -933,8 +1027,19 @@ async function post(url, body, options) {
|
|
|
933
1027
|
return request(url, body, options, 'POST');
|
|
934
1028
|
}
|
|
935
1029
|
|
|
1030
|
+
function on(trigger, fn) {
|
|
1031
|
+
events.on(trigger, fn);
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
function off(trigger, fn) {
|
|
1035
|
+
events.off(trigger, fn);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
936
1038
|
module.exports = {
|
|
937
1039
|
configure,
|
|
1040
|
+
on,
|
|
1041
|
+
off,
|
|
1042
|
+
events,
|
|
938
1043
|
get,
|
|
939
1044
|
post,
|
|
940
1045
|
request,
|
package/tests/init.js
CHANGED
|
@@ -10,7 +10,20 @@ const data = require('./data.json');
|
|
|
10
10
|
const port = process.env.PORT || 3101;
|
|
11
11
|
|
|
12
12
|
async function initTest() {
|
|
13
|
-
unprint.options({
|
|
13
|
+
unprint.options({
|
|
14
|
+
headers: { 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' },
|
|
15
|
+
limits: {
|
|
16
|
+
default: {
|
|
17
|
+
concurrency: 1,
|
|
18
|
+
interval: 100,
|
|
19
|
+
},
|
|
20
|
+
},
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
unprint.on('requestInit', (initData) => console.log('init', initData));
|
|
24
|
+
unprint.on('requestError', (errorData) => console.error('error', errorData));
|
|
25
|
+
unprint.on('requestSuccess', (successData) => console.log('success', successData));
|
|
26
|
+
unprint.on('query', (queryData) => console.log('query', queryData));
|
|
14
27
|
|
|
15
28
|
const res = await unprint.get(`http://127.0.0.1:${port}/html`, { select: 'body' });
|
|
16
29
|
// const jsonRes = await unprint.get(`http://127.0.0.1:${port}/json`);
|