@ezs/basics 2.11.0 → 2.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/lib/index.js +2 -9
- package/lib/request.js +20 -3
- package/lib/url-connect.js +16 -11
- package/lib/url-fetch.js +4 -6
- package/lib/url-request.js +4 -6
- package/lib/url-stream.js +8 -8
- package/package.json +3 -3
- package/lib/fetch.js +0 -74
- package/lib/file-load-parquet.js +0 -37
- package/lib/obj-columns.js +0 -53
- package/lib/skos-object.js +0 -73
- package/lib/txt-inflection.js +0 -61
- package/lib/txt-sentences.js +0 -82
- package/lib/url-pager.js +0 -179
- package/lib/utils.js +0 -18
package/README.md
CHANGED
|
@@ -734,7 +734,7 @@ Useful to send JSON data to an API and get results.
|
|
|
734
734
|
* `url` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** URL to fetch
|
|
735
735
|
* `streaming` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Direct connection to the Object Stream server (disables the retries setting) (optional, default `false`)
|
|
736
736
|
* `json` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Parse as JSON the content of URL (optional, default `false`)
|
|
737
|
-
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `
|
|
737
|
+
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `5000`)
|
|
738
738
|
* `noerror` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Ignore all errors (optional, default `false`)
|
|
739
739
|
* `retries` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The maximum amount of times to retry the connection (optional, default `5`)
|
|
740
740
|
* `encoder` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The statement to encode each chunk to a string (optional, default `dump`)
|
|
@@ -755,7 +755,7 @@ Or if no target is specified, the output will be the returned content of URL.
|
|
|
755
755
|
* `target` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** choose the key to set
|
|
756
756
|
* `json` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** parse as JSON the content of URL (optional, default `false`)
|
|
757
757
|
* `dataurl` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** encode content into DATA Url (optional, default `false`)
|
|
758
|
-
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** timeout in milliseconds (optional, default `
|
|
758
|
+
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** timeout in milliseconds (optional, default `5000`)
|
|
759
759
|
* `mimetype` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** mimetype for value of path (if presents) (optional, default `"application/json"`)
|
|
760
760
|
* `noerror` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** ignore all errors, the target field will remain undefined (optional, default `false`)
|
|
761
761
|
* `retries` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The maximum amount of times to retry the connection (optional, default `5`)
|
|
@@ -883,7 +883,7 @@ Output:
|
|
|
883
883
|
* `url` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** URL to fetch
|
|
884
884
|
* `json` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** parse result as json (optional, default `true`)
|
|
885
885
|
* `target` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** choose the key to set
|
|
886
|
-
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `
|
|
886
|
+
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `5000`)
|
|
887
887
|
* `noerror` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Ignore all errors, the target field will remain undefined (optional, default `false`)
|
|
888
888
|
* `retries` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The maximum amount of times to retry the connection (optional, default `5`)
|
|
889
889
|
* `insert` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** a header response value in the result
|
|
@@ -952,7 +952,7 @@ Output:
|
|
|
952
952
|
|
|
953
953
|
* `url` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)?** URL to fetch (by default input string is taken)
|
|
954
954
|
* `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** choose the path to split JSON result (optional, default `"*"`)
|
|
955
|
-
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `
|
|
955
|
+
* `timeout` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** Timeout in milliseconds (optional, default `5000`)
|
|
956
956
|
* `noerror` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** Ignore all errors, the target field will remain undefined (optional, default `false`)
|
|
957
957
|
* `retries` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The maximum amount of times to retry the connection (optional, default `5`)
|
|
958
958
|
|
package/lib/index.js
CHANGED
|
@@ -71,13 +71,6 @@ const funcs = {
|
|
|
71
71
|
INIString: _iniString.default,
|
|
72
72
|
FILESave: _fileSave.default,
|
|
73
73
|
FILELoad: _fileLoad.default,
|
|
74
|
-
FILEMerge: _fileMerge.default
|
|
75
|
-
// aliases
|
|
76
|
-
bufferify: _bufObject.default.BUFObject,
|
|
77
|
-
concat: _txtConcat.default.TXTConcat,
|
|
78
|
-
standardize: _objStandardize.default.OBJStandardize,
|
|
79
|
-
split: _txtParse.default.TXTParse,
|
|
80
|
-
segmenter: _txtParse.default.TXTParse
|
|
74
|
+
FILEMerge: _fileMerge.default
|
|
81
75
|
};
|
|
82
|
-
var _default = exports.default = funcs;
|
|
83
|
-
module.exports = funcs;
|
|
76
|
+
var _default = exports.default = funcs;
|
package/lib/request.js
CHANGED
|
@@ -4,10 +4,27 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
4
4
|
value: true
|
|
5
5
|
});
|
|
6
6
|
exports.default = void 0;
|
|
7
|
-
var _fetchWithProxy = _interopRequireDefault(require("fetch-with-proxy"));
|
|
8
|
-
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
9
7
|
const request = (url, parameters) => async bail => {
|
|
10
|
-
const
|
|
8
|
+
const hasBody = parameters.body !== undefined;
|
|
9
|
+
let response;
|
|
10
|
+
try {
|
|
11
|
+
response = await fetch(url, {
|
|
12
|
+
...parameters,
|
|
13
|
+
...(hasBody && {
|
|
14
|
+
duplex: 'half'
|
|
15
|
+
})
|
|
16
|
+
});
|
|
17
|
+
} catch (raw) {
|
|
18
|
+
// Normalise l'erreur quelle que soit la source (undici, DOMException, etc.)
|
|
19
|
+
const err = raw instanceof Error ? raw : Object.assign(new Error(raw?.message ?? String(raw)), {
|
|
20
|
+
name: raw?.name ?? 'FetchError',
|
|
21
|
+
cause: raw
|
|
22
|
+
});
|
|
23
|
+
if (err.name === 'AbortError' || err.name === 'TimeoutError') {
|
|
24
|
+
return bail(err);
|
|
25
|
+
}
|
|
26
|
+
throw err;
|
|
27
|
+
}
|
|
11
28
|
if (!response.ok) {
|
|
12
29
|
// response.status >= 200 && response.status < 300
|
|
13
30
|
const err = new Error(response.statusText);
|
package/lib/url-connect.js
CHANGED
|
@@ -5,14 +5,13 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.default = URLConnect;
|
|
7
7
|
var _JSONStream = _interopRequireDefault(require("JSONStream"));
|
|
8
|
+
var _stream = require("stream");
|
|
8
9
|
var _from = _interopRequireDefault(require("from"));
|
|
9
10
|
var _debug = _interopRequireDefault(require("debug"));
|
|
10
11
|
var _streamWrite = _interopRequireDefault(require("stream-write"));
|
|
11
|
-
var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
|
|
12
12
|
var _parseHeaders = _interopRequireDefault(require("parse-headers"));
|
|
13
13
|
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
14
14
|
var _getStream = _interopRequireDefault(require("get-stream"));
|
|
15
|
-
var _fetchWithProxy = _interopRequireDefault(require("fetch-with-proxy"));
|
|
16
15
|
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
17
16
|
const restMethods = ['POST', 'GET', 'DELETE', 'PUT', 'PATCH', 'HEAD', 'OPTIONS', 'TRACE'];
|
|
18
17
|
/**
|
|
@@ -26,7 +25,7 @@ const restMethods = ['POST', 'GET', 'DELETE', 'PUT', 'PATCH', 'HEAD', 'OPTIONS',
|
|
|
26
25
|
* @param {String} [url] URL to fetch
|
|
27
26
|
* @param {String} [streaming=false] Direct connection to the Object Stream server (disables the retries setting)
|
|
28
27
|
* @param {String} [json=false] Parse as JSON the content of URL
|
|
29
|
-
* @param {Number} [timeout=
|
|
28
|
+
* @param {Number} [timeout=5000] Timeout in milliseconds
|
|
30
29
|
* @param {Boolean} [noerror=false] Ignore all errors
|
|
31
30
|
* @param {Number} [retries=5] The maximum amount of times to retry the connection
|
|
32
31
|
* @param {String} [encoder=dump] The statement to encode each chunk to a string
|
|
@@ -45,7 +44,7 @@ async function URLConnect(data, feed) {
|
|
|
45
44
|
ezs
|
|
46
45
|
} = this;
|
|
47
46
|
if (this.isFirst()) {
|
|
48
|
-
const timeout = Number(this.getParam('timeout'))
|
|
47
|
+
const timeout = Number(this.getParam('timeout', 5000));
|
|
49
48
|
const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
|
|
50
49
|
this.input = ezs.createStream(ezs.objectMode());
|
|
51
50
|
const output = ezs.createStream(ezs.objectMode());
|
|
@@ -65,15 +64,19 @@ async function URLConnect(data, feed) {
|
|
|
65
64
|
body: bodyIn,
|
|
66
65
|
headers
|
|
67
66
|
};
|
|
67
|
+
const controller = new AbortController();
|
|
68
68
|
try {
|
|
69
69
|
await (0, _asyncRetry.default)(async (bail, numberOfTimes) => {
|
|
70
70
|
if (numberOfTimes > 1) {
|
|
71
71
|
(0, _debug.default)('ezs:debug')(`Attempts to reconnect (${numberOfTimes})`);
|
|
72
72
|
}
|
|
73
|
-
const
|
|
74
|
-
const response = await (
|
|
73
|
+
const hasBody = parameters.body !== undefined;
|
|
74
|
+
const response = await fetch(url, {
|
|
75
75
|
...parameters,
|
|
76
|
-
|
|
76
|
+
...(hasBody && {
|
|
77
|
+
duplex: 'half'
|
|
78
|
+
}),
|
|
79
|
+
signal: AbortSignal.any([controller.signal, AbortSignal.timeout(timeout)])
|
|
77
80
|
});
|
|
78
81
|
if (!response.ok) {
|
|
79
82
|
const err = new Error(response.statusText);
|
|
@@ -81,16 +84,17 @@ async function URLConnect(data, feed) {
|
|
|
81
84
|
err.responseText = text;
|
|
82
85
|
throw err;
|
|
83
86
|
}
|
|
87
|
+
const bodyStream = _stream.Readable.fromWeb(response.body);
|
|
84
88
|
if (streaming) {
|
|
85
|
-
const bodyOut = json ?
|
|
89
|
+
const bodyOut = json ? bodyStream.pipe(_JSONStream.default.parse('*')) : bodyStream;
|
|
86
90
|
bodyOut.once('error', e => {
|
|
87
|
-
controller.abort();
|
|
88
91
|
output.emit('error', e);
|
|
92
|
+
//controller.abort();
|
|
89
93
|
});
|
|
90
94
|
return bodyOut.pipe(output);
|
|
91
95
|
}
|
|
92
96
|
if (json) {
|
|
93
|
-
const bodyOutRaw = await (0, _getStream.default)(
|
|
97
|
+
const bodyOutRaw = await (0, _getStream.default)(bodyStream);
|
|
94
98
|
if (bodyOutRaw === '') {
|
|
95
99
|
throw new Error('URL returned an empty response');
|
|
96
100
|
}
|
|
@@ -102,11 +106,12 @@ async function URLConnect(data, feed) {
|
|
|
102
106
|
}
|
|
103
107
|
return (0, _from.default)(bodyOutArray).pipe(output);
|
|
104
108
|
}
|
|
105
|
-
return
|
|
109
|
+
return bodyStream.pipe(output);
|
|
106
110
|
}, {
|
|
107
111
|
retries: streaming ? 0 : retries
|
|
108
112
|
});
|
|
109
113
|
} catch (e) {
|
|
114
|
+
controller.abort();
|
|
110
115
|
if (!noerror) {
|
|
111
116
|
(0, _debug.default)('ezs:warn')(`Break item #${this.getIndex()} [URLConnect]`, ezs.serializeError(e));
|
|
112
117
|
feed.stop(e);
|
package/lib/url-fetch.js
CHANGED
|
@@ -6,7 +6,6 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.default = URLFetch;
|
|
7
7
|
var _debug = _interopRequireDefault(require("debug"));
|
|
8
8
|
var _lodash = require("lodash");
|
|
9
|
-
var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
|
|
10
9
|
var _parseHeaders = _interopRequireDefault(require("parse-headers"));
|
|
11
10
|
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
12
11
|
var _request = _interopRequireDefault(require("./request"));
|
|
@@ -24,7 +23,7 @@ const createObjectURL = (arrayBuffer, mimeType = 'application/octet-stream') =>
|
|
|
24
23
|
* @param {String} [target] choose the key to set
|
|
25
24
|
* @param {String} [json=false] parse as JSON the content of URL
|
|
26
25
|
* @param {String} [dataurl=false] encode content into DATA Url
|
|
27
|
-
* @param {Number} [timeout=
|
|
26
|
+
* @param {Number} [timeout=5000] timeout in milliseconds
|
|
28
27
|
* @param {String} [mimetype="application/json"] mimetype for value of path (if presents)
|
|
29
28
|
* @param {Boolean} [noerror=false] ignore all errors, the target field will remain undefined
|
|
30
29
|
* @param {Number} [retries=5] The maximum amount of times to retry the connection
|
|
@@ -41,16 +40,15 @@ async function URLFetch(data, feed) {
|
|
|
41
40
|
const dataurl = Boolean(this.getParam('dataurl', false));
|
|
42
41
|
const retries = Number(this.getParam('retries', 5));
|
|
43
42
|
const noerror = Boolean(this.getParam('noerror', false));
|
|
44
|
-
const timeout = Number(this.getParam('timeout'))
|
|
43
|
+
const timeout = Number(this.getParam('timeout', 5000));
|
|
45
44
|
const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
|
|
46
45
|
const mimetype = String(this.getParam('mimetype', 'application/json'));
|
|
47
|
-
const controller = new
|
|
46
|
+
const controller = new AbortController();
|
|
48
47
|
const key = Array.isArray(path) ? path.shift() : path;
|
|
49
48
|
const body = (0, _lodash.get)(data, key);
|
|
50
49
|
const parameters = {
|
|
51
|
-
timeout,
|
|
52
50
|
headers,
|
|
53
|
-
signal: controller.signal
|
|
51
|
+
signal: AbortSignal.any([controller.signal, AbortSignal.timeout(timeout)])
|
|
54
52
|
};
|
|
55
53
|
const options = {
|
|
56
54
|
retries
|
package/lib/url-request.js
CHANGED
|
@@ -7,7 +7,6 @@ exports.default = URLRequest;
|
|
|
7
7
|
var _lodash = require("lodash");
|
|
8
8
|
var _debug = _interopRequireDefault(require("debug"));
|
|
9
9
|
var _url = require("url");
|
|
10
|
-
var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
|
|
11
10
|
var _parseHeaders = _interopRequireDefault(require("parse-headers"));
|
|
12
11
|
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
13
12
|
var _request = _interopRequireDefault(require("./request"));
|
|
@@ -43,7 +42,7 @@ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e
|
|
|
43
42
|
* @param {String} [url] URL to fetch
|
|
44
43
|
* @param {Boolean} [json=true] parse result as json
|
|
45
44
|
* @param {String} [target] choose the key to set
|
|
46
|
-
* @param {Number} [timeout=
|
|
45
|
+
* @param {Number} [timeout=5000] Timeout in milliseconds
|
|
47
46
|
* @param {Boolean} [noerror=false] Ignore all errors, the target field will remain undefined
|
|
48
47
|
* @param {Number} [retries=5] The maximum amount of times to retry the connection
|
|
49
48
|
* @param {String} [insert] a header response value in the result
|
|
@@ -58,15 +57,14 @@ async function URLRequest(data, feed) {
|
|
|
58
57
|
const target = [].concat(this.getParam('target')).filter(Boolean).shift();
|
|
59
58
|
const retries = Number(this.getParam('retries', 5));
|
|
60
59
|
const noerror = Boolean(this.getParam('noerror', false));
|
|
61
|
-
const timeout = Number(this.getParam('timeout'))
|
|
60
|
+
const timeout = Number(this.getParam('timeout', 5000));
|
|
62
61
|
const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
|
|
63
62
|
const inserts = [].concat(this.getParam('insert')).filter(Boolean);
|
|
64
63
|
const cURL = new _url.URL(url || data);
|
|
65
|
-
const controller = new
|
|
64
|
+
const controller = new AbortController();
|
|
66
65
|
const parameters = {
|
|
67
|
-
timeout,
|
|
68
66
|
headers,
|
|
69
|
-
signal: controller.signal
|
|
67
|
+
signal: AbortSignal.any([controller.signal, AbortSignal.timeout(timeout)])
|
|
70
68
|
};
|
|
71
69
|
const options = {
|
|
72
70
|
retries
|
package/lib/url-stream.js
CHANGED
|
@@ -5,8 +5,8 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
5
5
|
});
|
|
6
6
|
exports.default = URLStream;
|
|
7
7
|
var _debug = _interopRequireDefault(require("debug"));
|
|
8
|
+
var _stream = require("stream");
|
|
8
9
|
var _url = require("url");
|
|
9
|
-
var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
|
|
10
10
|
var _JSONStream = _interopRequireDefault(require("JSONStream"));
|
|
11
11
|
var _parseHeaders = _interopRequireDefault(require("parse-headers"));
|
|
12
12
|
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
@@ -73,7 +73,7 @@ function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e
|
|
|
73
73
|
* @name URLStream
|
|
74
74
|
* @param {String} [url] URL to fetch (by default input string is taken)
|
|
75
75
|
* @param {String} [path="*"] choose the path to split JSON result
|
|
76
|
-
* @param {Number} [timeout=
|
|
76
|
+
* @param {Number} [timeout=5000] Timeout in milliseconds
|
|
77
77
|
* @param {Boolean} [noerror=false] Ignore all errors, the target field will remain undefined
|
|
78
78
|
* @param {Number} [retries=5] The maximum amount of times to retry the connection
|
|
79
79
|
* @returns {Object}
|
|
@@ -89,14 +89,13 @@ async function URLStream(data, feed) {
|
|
|
89
89
|
const path = this.getParam('path', '*');
|
|
90
90
|
const retries = Number(this.getParam('retries', 5));
|
|
91
91
|
const noerror = Boolean(this.getParam('noerror', false));
|
|
92
|
-
const timeout = Number(this.getParam('timeout'))
|
|
92
|
+
const timeout = Number(this.getParam('timeout', 5000));
|
|
93
93
|
const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
|
|
94
94
|
const cURL = new _url.URL(url || data);
|
|
95
|
-
const controller = new
|
|
95
|
+
const controller = new AbortController();
|
|
96
96
|
const parameters = {
|
|
97
|
-
timeout,
|
|
98
97
|
headers,
|
|
99
|
-
signal: controller.signal
|
|
98
|
+
signal: AbortSignal.any([controller.signal, AbortSignal.timeout(timeout)])
|
|
100
99
|
};
|
|
101
100
|
const options = {
|
|
102
101
|
retries
|
|
@@ -105,7 +104,6 @@ async function URLStream(data, feed) {
|
|
|
105
104
|
cURL.search = new _url.URLSearchParams(data);
|
|
106
105
|
}
|
|
107
106
|
const onError = e => {
|
|
108
|
-
controller.abort();
|
|
109
107
|
if (noerror) {
|
|
110
108
|
(0, _debug.default)('ezs:info')(`Ignore item #${this.getIndex()} [URLStream]`, ezs.serializeError(e));
|
|
111
109
|
return feed.send(data);
|
|
@@ -115,10 +113,12 @@ async function URLStream(data, feed) {
|
|
|
115
113
|
};
|
|
116
114
|
try {
|
|
117
115
|
const response = await (0, _asyncRetry.default)((0, _request.default)(cURL.href, parameters), options);
|
|
118
|
-
const
|
|
116
|
+
const bodyStream = _stream.Readable.fromWeb(response.body);
|
|
117
|
+
const output = path ? bodyStream.pipe(_JSONStream.default.parse(path)) : bodyStream;
|
|
119
118
|
output.once('error', onError);
|
|
120
119
|
await feed.flow(output);
|
|
121
120
|
} catch (e) {
|
|
122
121
|
onError(e);
|
|
122
|
+
controller.abort();
|
|
123
123
|
}
|
|
124
124
|
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ezs/basics",
|
|
3
3
|
"description": "Basics statements for EZS",
|
|
4
|
-
"version": "2.
|
|
4
|
+
"version": "2.12.0",
|
|
5
5
|
"author": "Nicolas Thouvenin <nthouvenin@gmail.com>",
|
|
6
6
|
"bugs": "https://github.com/Inist-CNRS/ezs/issues",
|
|
7
7
|
"dependencies": {
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
"from": "0.1.7",
|
|
17
17
|
"get-stream": "6.0.1",
|
|
18
18
|
"higher-path": "1.0.0",
|
|
19
|
-
"lodash": "4.
|
|
19
|
+
"lodash": "4.18.1",
|
|
20
20
|
"make-dir": "4.0.0",
|
|
21
21
|
"micromatch": "4.0.8",
|
|
22
22
|
"node-abort-controller": "1.1.0",
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"directories": {
|
|
37
37
|
"test": "test"
|
|
38
38
|
},
|
|
39
|
-
"gitHead": "
|
|
39
|
+
"gitHead": "d495361ad3a11b258c4370e97bfa6899f5c9dfbf",
|
|
40
40
|
"homepage": "https://github.com/Inist-CNRS/ezs/tree/master/packages/basics#readme",
|
|
41
41
|
"keywords": [
|
|
42
42
|
"ezs"
|
package/lib/fetch.js
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = fetch;
|
|
7
|
-
|
|
8
|
-
var _crossFetch = _interopRequireDefault(require("cross-fetch"));
|
|
9
|
-
|
|
10
|
-
var _proxyFromEnv = require("proxy-from-env");
|
|
11
|
-
|
|
12
|
-
var _http = _interopRequireDefault(require("http"));
|
|
13
|
-
|
|
14
|
-
var _https = _interopRequireDefault(require("https"));
|
|
15
|
-
|
|
16
|
-
var _betterHttpsProxyAgent = require("better-https-proxy-agent");
|
|
17
|
-
|
|
18
|
-
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
19
|
-
|
|
20
|
-
const DefaultOptions = {
|
|
21
|
-
keepAlive: true,
|
|
22
|
-
timeout: 1000,
|
|
23
|
-
keepAliveMsecs: 500,
|
|
24
|
-
maxSockets: 200,
|
|
25
|
-
maxFreeSockets: 5,
|
|
26
|
-
maxCachedSessions: 500
|
|
27
|
-
};
|
|
28
|
-
|
|
29
|
-
const chooseAgent = (parsedURL, options) => {
|
|
30
|
-
const proxyurl = (0, _proxyFromEnv.getProxyForUrl)(parsedURL.href);
|
|
31
|
-
|
|
32
|
-
if (proxyurl) {
|
|
33
|
-
const proxyRequestOptions = new URL(proxyurl);
|
|
34
|
-
return new _betterHttpsProxyAgent.Agent(options, proxyRequestOptions);
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
if (parsedURL.protocol === 'https:') {
|
|
38
|
-
return new _https.default.Agent(options);
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
return new _http.default.Agent(options);
|
|
42
|
-
};
|
|
43
|
-
|
|
44
|
-
function fetch(url, options) {
|
|
45
|
-
const opts = options || {};
|
|
46
|
-
const {
|
|
47
|
-
keepAlive,
|
|
48
|
-
timeout,
|
|
49
|
-
keepAliveMsecs,
|
|
50
|
-
maxSockets,
|
|
51
|
-
maxFreeSockets,
|
|
52
|
-
maxCachedSessions
|
|
53
|
-
} = { ...options,
|
|
54
|
-
...DefaultOptions
|
|
55
|
-
};
|
|
56
|
-
let agent = chooseAgent(new URL(url), {
|
|
57
|
-
keepAlive,
|
|
58
|
-
timeout,
|
|
59
|
-
keepAliveMsecs,
|
|
60
|
-
maxSockets,
|
|
61
|
-
maxFreeSockets,
|
|
62
|
-
maxCachedSessions
|
|
63
|
-
});
|
|
64
|
-
opts.agent = agent;
|
|
65
|
-
|
|
66
|
-
if (opts.signal) {
|
|
67
|
-
opts.signal.addEventListener('abort', () => {
|
|
68
|
-
agent.destroy();
|
|
69
|
-
agent = null;
|
|
70
|
-
});
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
return (0, _crossFetch.default)(url, options);
|
|
74
|
-
}
|
package/lib/file-load-parquet.js
DELETED
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = FILELoadParquet;
|
|
7
|
-
var _fs = require("fs");
|
|
8
|
-
var _path = require("path");
|
|
9
|
-
var _os = require("os");
|
|
10
|
-
var _higherPath = _interopRequireDefault(require("higher-path"));
|
|
11
|
-
var _hyparquet = require("hyparquet");
|
|
12
|
-
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
13
|
-
/**
|
|
14
|
-
* @name FileLoadParquet
|
|
15
|
-
* @param {String} [location=TMPDIR] Directory location
|
|
16
|
-
* @returns {Object}
|
|
17
|
-
*/
|
|
18
|
-
async function FILELoadParquet(data, feed) {
|
|
19
|
-
if (this.isLast()) {
|
|
20
|
-
return feed.close();
|
|
21
|
-
}
|
|
22
|
-
const location = (0, _path.normalize)(this.getParam('location', '/'));
|
|
23
|
-
const locations = [(0, _higherPath.default)((0, _os.tmpdir)(), location), (0, _higherPath.default)(process.cwd(), location)];
|
|
24
|
-
const filename = locations.filter(Boolean).map(dir => (0, _path.resolve)(dir, String(data).trim())).filter(fil => (0, _fs.existsSync)(fil)).shift();
|
|
25
|
-
if (!filename) {
|
|
26
|
-
return feed.stop(new Error('File location check failed.'));
|
|
27
|
-
}
|
|
28
|
-
(0, _fs.accessSync)((0, _path.dirname)(filename), _fs.constants.R_OK | _fs.constants.W_OK);
|
|
29
|
-
(0, _fs.accessSync)(filename, _fs.constants.R_OK | _fs.constants.W_OK);
|
|
30
|
-
await (0, _hyparquet.parquetRead)({
|
|
31
|
-
file: await (0, _hyparquet.asyncBufferFromFile)(filename),
|
|
32
|
-
rowFormat: 'object',
|
|
33
|
-
onComplete: rows => feed.write(rows)
|
|
34
|
-
});
|
|
35
|
-
return feed.end();
|
|
36
|
-
}
|
|
37
|
-
;
|
package/lib/obj-columns.js
DELETED
|
@@ -1,53 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = OBJColumns;
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Take an `Object` and flatten it to get only one level of keys.
|
|
10
|
-
*
|
|
11
|
-
* <caption>Input:</caption>
|
|
12
|
-
*
|
|
13
|
-
* ```json
|
|
14
|
-
* [{
|
|
15
|
-
* "foo": {
|
|
16
|
-
* "hello": "world"
|
|
17
|
-
* },
|
|
18
|
-
* "bar": "anything else",
|
|
19
|
-
* "baz": 1
|
|
20
|
-
* }]
|
|
21
|
-
* ```
|
|
22
|
-
*
|
|
23
|
-
* <caption>Output:</caption>
|
|
24
|
-
*
|
|
25
|
-
* ```json
|
|
26
|
-
* [{
|
|
27
|
-
* "foo": "{\"hello\":\"world\"}",
|
|
28
|
-
* "bar": "anything else",
|
|
29
|
-
* "baz": 1
|
|
30
|
-
* }]
|
|
31
|
-
* ```
|
|
32
|
-
*
|
|
33
|
-
* @name OBJColumns
|
|
34
|
-
* @alias flatten
|
|
35
|
-
* @param {undefined} none
|
|
36
|
-
* @returns {Object}
|
|
37
|
-
*/
|
|
38
|
-
function OBJColumns(data, feed) {
|
|
39
|
-
if (this.isLast()) {
|
|
40
|
-
feed.close();
|
|
41
|
-
return;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
const obj = {};
|
|
45
|
-
Object.keys(data).sort((x, y) => x.localeCompare(y)).forEach(key => {
|
|
46
|
-
if (typeof data[key] === 'object') {
|
|
47
|
-
obj[key] = JSON.stringify(data[key]);
|
|
48
|
-
} else {
|
|
49
|
-
obj[key] = data[key];
|
|
50
|
-
}
|
|
51
|
-
});
|
|
52
|
-
feed.send(obj);
|
|
53
|
-
}
|
package/lib/skos-object.js
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = void 0;
|
|
7
|
-
|
|
8
|
-
function Concept(data, feed) {
|
|
9
|
-
const obj = {};
|
|
10
|
-
Object.keys(data).forEach(key => {
|
|
11
|
-
const newkey = key.replace('skos$', '');
|
|
12
|
-
|
|
13
|
-
if (Array.isArray(data[key])) {
|
|
14
|
-
data[key].filter(x => x.xml$lang).forEach(item => {
|
|
15
|
-
const localkey = newkey.concat('@').concat(item.xml$lang);
|
|
16
|
-
obj[localkey] = item.$t;
|
|
17
|
-
});
|
|
18
|
-
} else if (data[key].rdf$resource && !obj[newkey]) {
|
|
19
|
-
obj[newkey] = data[key].rdf$resource;
|
|
20
|
-
} else if (data[key].rdf$resource && obj[newkey]) {
|
|
21
|
-
obj[newkey] = [obj[newkey], data[key].rdf$resource];
|
|
22
|
-
} else if (data[key].$t && data[key].xml$lang) {
|
|
23
|
-
const localkey = newkey.concat('@').concat(data[key].xml$lang);
|
|
24
|
-
obj[localkey] = data[key].$t;
|
|
25
|
-
} else if (data[key].$t && Array.isArray(obj[newkey])) {
|
|
26
|
-
obj[newkey].push(data[key].$t);
|
|
27
|
-
} else if (data[key].$t && obj[newkey]) {
|
|
28
|
-
obj[newkey] = [obj[newkey], data[key].$t];
|
|
29
|
-
} else if (data[key].$t && !obj[newkey]) {
|
|
30
|
-
obj[newkey] = data[key].$t;
|
|
31
|
-
} else if (typeof data[key] === 'object') {
|
|
32
|
-
obj[newkey] = (this.getIndex().toString(36) + Math.random().toString(36).substr(2, 5)).toUpperCase();
|
|
33
|
-
let counter = 0;
|
|
34
|
-
Object.keys(data[key]).forEach(key2 => {
|
|
35
|
-
if (typeof data[key][key2] === 'object') {
|
|
36
|
-
data[key][key2].rdf$about = obj[newkey];
|
|
37
|
-
Concept.call(this, data[key][key2], feed);
|
|
38
|
-
counter += 1;
|
|
39
|
-
}
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
if (counter === 0) {
|
|
43
|
-
delete obj[newkey];
|
|
44
|
-
}
|
|
45
|
-
} else {
|
|
46
|
-
obj[newkey] = data[key];
|
|
47
|
-
}
|
|
48
|
-
});
|
|
49
|
-
feed.write(obj);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function SKOSObject(data, feed) {
|
|
53
|
-
if (this.isLast()) {
|
|
54
|
-
feed.close();
|
|
55
|
-
} else {
|
|
56
|
-
Concept.call(this, data, feed);
|
|
57
|
-
feed.end();
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Take `Object` generated by XMLMapping & SKOS data and
|
|
62
|
-
* create a new basic object with only keys & values
|
|
63
|
-
*
|
|
64
|
-
* @name SKOSObject
|
|
65
|
-
* @param {undefined} none
|
|
66
|
-
* @returns {Object}
|
|
67
|
-
*/
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
var _default = {
|
|
71
|
-
SKOSObject
|
|
72
|
-
};
|
|
73
|
-
exports.default = _default;
|
package/lib/txt-inflection.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = void 0;
|
|
7
|
-
var _lodash = require("lodash");
|
|
8
|
-
var _inflection = require("inflection");
|
|
9
|
-
const transformer = transformations => str => str && typeof str === 'string' ? (0, _inflection.transform)(str, transformations) : str;
|
|
10
|
-
const TXTInflection = (data, feed, ctx) => {
|
|
11
|
-
if (ctx.isLast()) {
|
|
12
|
-
return feed.close();
|
|
13
|
-
}
|
|
14
|
-
const transformations = [].concat(ctx.getParam('transform', [])).filter(Boolean);
|
|
15
|
-
const path = ctx.getParam('path', 'value');
|
|
16
|
-
const value = (0, _lodash.get)(data, path, '');
|
|
17
|
-
const process = transformer(transformations);
|
|
18
|
-
const result = Array.isArray(value) ? value.map(item => process(item)) : process(value);
|
|
19
|
-
feed.write({
|
|
20
|
-
...data,
|
|
21
|
-
[path]: result
|
|
22
|
-
});
|
|
23
|
-
return feed.end();
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* Take a `String` and inflect it with or more transformers from this list
|
|
28
|
-
* pluralize, singularize, camelize, underscore, humanize, capitalize,
|
|
29
|
-
* dasherize, titleize, demodulize, tableize, classify, foreign_key, ordinalize
|
|
30
|
-
*
|
|
31
|
-
* Input:
|
|
32
|
-
*
|
|
33
|
-
* ```json
|
|
34
|
-
* { "id": 1, "value": "all job" }
|
|
35
|
-
* ```
|
|
36
|
-
* Script:
|
|
37
|
-
* ```ini
|
|
38
|
-
* [TXTInflection]
|
|
39
|
-
* transform = pluralize
|
|
40
|
-
* transform = capitalize
|
|
41
|
-
* transform = dasherize
|
|
42
|
-
* ```
|
|
43
|
-
*
|
|
44
|
-
* Output:
|
|
45
|
-
*
|
|
46
|
-
* ```json
|
|
47
|
-
* { "id": 1, "value": "All-jobs" }
|
|
48
|
-
* ```
|
|
49
|
-
*
|
|
50
|
-
* @name TXTInflection
|
|
51
|
-
* @param {String} [path="value"] path of the field to segment
|
|
52
|
-
* @param {String} [transform] name of a transformer
|
|
53
|
-
* @returns {String[]}
|
|
54
|
-
* @deprecated
|
|
55
|
-
* see https://inist-cnrs.github.io/ezs/#/plugin-strings?id=inflection
|
|
56
|
-
* see https://www.npmjs.com/package/inflection
|
|
57
|
-
*/
|
|
58
|
-
var _default = {
|
|
59
|
-
TXTInflection
|
|
60
|
-
};
|
|
61
|
-
exports.default = _default;
|
package/lib/txt-sentences.js
DELETED
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = void 0;
|
|
7
|
-
var _lodash = require("lodash");
|
|
8
|
-
const UPPER_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
|
|
9
|
-
const SENTENCE_INIT = ' ';
|
|
10
|
-
const SENTENCE_ENDING = '.?!';
|
|
11
|
-
|
|
12
|
-
/*
|
|
13
|
-
* Segment sentences from `str` into an array
|
|
14
|
-
* @param {string} str
|
|
15
|
-
* @returns {string[]}
|
|
16
|
-
*/
|
|
17
|
-
const segmentSentences = str => {
|
|
18
|
-
const characters = Array.from(str);
|
|
19
|
-
const sentences = characters.reduce(
|
|
20
|
-
/*
|
|
21
|
-
* @param {string[]} prevSentences
|
|
22
|
-
* @param {string} character
|
|
23
|
-
* @return {string[]}
|
|
24
|
-
*/
|
|
25
|
-
(prevSentences, character) => {
|
|
26
|
-
const currentSentence = prevSentences.slice(-1)[0];
|
|
27
|
-
const [char1, char2] = currentSentence.slice(-2);
|
|
28
|
-
if (SENTENCE_ENDING.includes(character)) {
|
|
29
|
-
if (character !== '.') {
|
|
30
|
-
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
|
|
31
|
-
}
|
|
32
|
-
if (char1 !== ' ') {
|
|
33
|
-
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
|
|
34
|
-
}
|
|
35
|
-
if (!UPPER_LETTERS.includes(char2)) {
|
|
36
|
-
return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
return [...prevSentences.slice(0, -1), currentSentence + character];
|
|
40
|
-
}, [SENTENCE_INIT]).filter(sentence => sentence !== SENTENCE_INIT).map(sentence => sentence.trimStart());
|
|
41
|
-
return sentences;
|
|
42
|
-
};
|
|
43
|
-
const TXTSentences = (data, feed, ctx) => {
|
|
44
|
-
if (ctx.isLast()) {
|
|
45
|
-
return feed.close();
|
|
46
|
-
}
|
|
47
|
-
const path = ctx.getParam('path', 'value');
|
|
48
|
-
const value = (0, _lodash.get)(data, path);
|
|
49
|
-
const str = Array.isArray(value) ? value.map(item => typeof item === 'string' ? item : '').join(' ') : value;
|
|
50
|
-
const sentences = str ? segmentSentences(str) : [];
|
|
51
|
-
feed.write({
|
|
52
|
-
...data,
|
|
53
|
-
[path]: sentences
|
|
54
|
-
});
|
|
55
|
-
return feed.end();
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* Take a `String` and split it into an array of sentences.
|
|
60
|
-
*
|
|
61
|
-
* Input:
|
|
62
|
-
*
|
|
63
|
-
* ```json
|
|
64
|
-
* { "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." }
|
|
65
|
-
* ```
|
|
66
|
-
*
|
|
67
|
-
* Output:
|
|
68
|
-
*
|
|
69
|
-
* ```json
|
|
70
|
-
* { "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] }
|
|
71
|
-
* ```
|
|
72
|
-
*
|
|
73
|
-
* @name TXTSentences
|
|
74
|
-
* @param {String} [path="value"] path of the field to segment
|
|
75
|
-
* @returns {String[]}
|
|
76
|
-
* @deprecated
|
|
77
|
-
* see https://inist-cnrs.github.io/ezs/#/plugin-strings?id=sentences
|
|
78
|
-
*/
|
|
79
|
-
var _default = {
|
|
80
|
-
TXTSentences
|
|
81
|
-
};
|
|
82
|
-
exports.default = _default;
|
package/lib/url-pager.js
DELETED
|
@@ -1,179 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = URLPager;
|
|
7
|
-
|
|
8
|
-
var _debug = _interopRequireDefault(require("debug"));
|
|
9
|
-
|
|
10
|
-
var _url = require("url");
|
|
11
|
-
|
|
12
|
-
var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
|
|
13
|
-
|
|
14
|
-
var _lodash = _interopRequireDefault(require("lodash.get"));
|
|
15
|
-
|
|
16
|
-
var _parseHeaders = _interopRequireDefault(require("parse-headers"));
|
|
17
|
-
|
|
18
|
-
var _asyncRetry = _interopRequireDefault(require("async-retry"));
|
|
19
|
-
|
|
20
|
-
var _request = _interopRequireDefault(require("./request"));
|
|
21
|
-
|
|
22
|
-
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* Take `Object` as parameters of URL, throw each chunk from the result
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
* Input:
|
|
29
|
-
*
|
|
30
|
-
* ```json
|
|
31
|
-
* [{"q": "a"}]
|
|
32
|
-
* ```
|
|
33
|
-
*
|
|
34
|
-
* Script:
|
|
35
|
-
*
|
|
36
|
-
* ```ini
|
|
37
|
-
* [URLPager]
|
|
38
|
-
* url = https://api.search.net
|
|
39
|
-
* path = total
|
|
40
|
-
* ```
|
|
41
|
-
*
|
|
42
|
-
* Output:
|
|
43
|
-
*
|
|
44
|
-
* ```json
|
|
45
|
-
* [
|
|
46
|
-
* {
|
|
47
|
-
* "q": "a",
|
|
48
|
-
* "total": 22
|
|
49
|
-
* "offset": 0,
|
|
50
|
-
* "pageNumber": 1,
|
|
51
|
-
* "totalPages", 3,
|
|
52
|
-
* "maxPages": 1000,
|
|
53
|
-
* "limit": 10
|
|
54
|
-
* },
|
|
55
|
-
* {
|
|
56
|
-
* "q": "a",
|
|
57
|
-
* "total": 22
|
|
58
|
-
* "offset": 10,
|
|
59
|
-
* "pageNumber": 2,
|
|
60
|
-
* "totalPages", 3,
|
|
61
|
-
* "maxPages": 1000,
|
|
62
|
-
* "limit": 10
|
|
63
|
-
* },
|
|
64
|
-
* {
|
|
65
|
-
* "q": "a",
|
|
66
|
-
* "total": 22
|
|
67
|
-
* "offset": 20,
|
|
68
|
-
* "pageNumber": 3,
|
|
69
|
-
* "totalPages", 3,
|
|
70
|
-
* "maxPages": 1000,
|
|
71
|
-
* "limit": 10
|
|
72
|
-
* }
|
|
73
|
-
* ]
|
|
74
|
-
* ```
|
|
75
|
-
*
|
|
76
|
-
* #### Example with URLs
|
|
77
|
-
*
|
|
78
|
-
* Input:
|
|
79
|
-
*
|
|
80
|
-
* ```json
|
|
81
|
-
* [
|
|
82
|
-
* "https://httpbin.org/get?a=a",
|
|
83
|
-
* "https://httpbin.org/get?a=b",
|
|
84
|
-
* "https://httpbin.org/get?a=c"
|
|
85
|
-
* ]
|
|
86
|
-
* ```
|
|
87
|
-
*
|
|
88
|
-
* Script:
|
|
89
|
-
*
|
|
90
|
-
* ```ini
|
|
91
|
-
* [URLPager]
|
|
92
|
-
* path = .args
|
|
93
|
-
* ```
|
|
94
|
-
*
|
|
95
|
-
* Output:
|
|
96
|
-
*
|
|
97
|
-
* ```json
|
|
98
|
-
* [{"a": "a"}, {"a": "b"}, {"a": "c" }]
|
|
99
|
-
* ```
|
|
100
|
-
*
|
|
101
|
-
* @name URLPager
|
|
102
|
-
* @param {String} [url] URL to fetch (by default input string is taken)
|
|
103
|
-
* @param {String} [path=total] choose the path to find the number of result
|
|
104
|
-
* @param {Number} [timeout=1000] Timeout in milliseconds
|
|
105
|
-
* @param {Boolean} [noerror=false] Ignore all errors, the target field will remain undefined
|
|
106
|
-
* @param {Number} [retries=5] The maximum amount of times to retry the connection
|
|
107
|
-
* @returns {Object}
|
|
108
|
-
*/
|
|
109
|
-
async function URLPager(data, feed) {
|
|
110
|
-
if (this.isLast()) {
|
|
111
|
-
return feed.close();
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
const url = this.getParam('url');
|
|
115
|
-
const path = this.getParam('path', 'total');
|
|
116
|
-
const limit = Number(this.getParam('limit', 10));
|
|
117
|
-
const maxPages = Number(this.getParam('maxPages', 1000));
|
|
118
|
-
const retries = Number(this.getParam('retries', 5));
|
|
119
|
-
const noerror = Boolean(this.getParam('noerror', false));
|
|
120
|
-
const timeout = Number(this.getParam('timeout')) || 1000;
|
|
121
|
-
const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
|
|
122
|
-
const cURL = new _url.URL(url || data);
|
|
123
|
-
const controller = new _nodeAbortController.default();
|
|
124
|
-
const parameters = {
|
|
125
|
-
timeout,
|
|
126
|
-
headers,
|
|
127
|
-
signal: controller.signal
|
|
128
|
-
};
|
|
129
|
-
const options = {
|
|
130
|
-
retries
|
|
131
|
-
};
|
|
132
|
-
cURL.search = new _url.URLSearchParams(data);
|
|
133
|
-
|
|
134
|
-
const onError = e => {
|
|
135
|
-
controller.abort();
|
|
136
|
-
|
|
137
|
-
if (noerror) {
|
|
138
|
-
(0, _debug.default)('ezs')(`Ignore item #${this.getIndex()} [URLPager] <${e}>`);
|
|
139
|
-
return feed.send(data);
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
(0, _debug.default)('ezs')(`Break item #${this.getIndex()} [URLPager] <${e}>`);
|
|
143
|
-
return feed.send(e);
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
try {
|
|
147
|
-
const response = await (0, _asyncRetry.default)((0, _request.default)(cURL.href, parameters), options);
|
|
148
|
-
const json = await response.json();
|
|
149
|
-
const total = (0, _lodash.default)(json, path);
|
|
150
|
-
|
|
151
|
-
if (total === 0) {
|
|
152
|
-
return onError(new Error('No result.'));
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
if (total === undefined) {
|
|
156
|
-
return onError(new Error('Unexpected response.'));
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
let totalPages = Math.ceil(json.total / limit);
|
|
160
|
-
|
|
161
|
-
if (totalPages > maxPages) {
|
|
162
|
-
totalPages = maxPages;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
for (let pageNumber = 1; pageNumber <= totalPages; pageNumber += 1) {
|
|
166
|
-
feed.write({ ...data,
|
|
167
|
-
offset: (pageNumber - 1) * limit,
|
|
168
|
-
pageNumber,
|
|
169
|
-
totalPages,
|
|
170
|
-
maxPages,
|
|
171
|
-
limit
|
|
172
|
-
});
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
feed.end();
|
|
176
|
-
} catch (e) {
|
|
177
|
-
onError(e);
|
|
178
|
-
}
|
|
179
|
-
}
|
package/lib/utils.js
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
Object.defineProperty(exports, "__esModule", {
|
|
4
|
-
value: true
|
|
5
|
-
});
|
|
6
|
-
exports.default = exports.writeTo = void 0;
|
|
7
|
-
|
|
8
|
-
const writeTo = function writeTo(stream, data, cb) {
|
|
9
|
-
if (!stream.write(data)) {
|
|
10
|
-
stream.once('drain', cb);
|
|
11
|
-
} else {
|
|
12
|
-
process.nextTick(cb);
|
|
13
|
-
}
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
exports.writeTo = writeTo;
|
|
17
|
-
var _default = writeTo;
|
|
18
|
-
exports.default = _default;
|