@ezs/basics 1.23.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -3,6 +3,34 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
  See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
5
5
 
6
+ # [2.0.0](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.24.0...@ezs/basics@2.0.0) (2023-03-29)
7
+
8
+
9
+ ### Features
10
+
11
+ * **basics:** Make TXTSentences pass some tests ([95bd41f](https://github.com/Inist-CNRS/ezs/commit/95bd41f836e5ee2f71c8413142df9ac4dd9ec84b))
12
+ * **basics:** Make TXTSentences work with arrays too ([24c3f76](https://github.com/Inist-CNRS/ezs/commit/24c3f76be73a518791e59b7e78c4d1151af8edd6))
13
+
14
+
15
+ ### BREAKING CHANGES
16
+
17
+ * **basics:** TXTSentences xxpected structure is now an object, with a value key.
18
+
19
+
20
+
21
+
22
+
23
+ # [1.24.0](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.23.5...@ezs/basics@1.24.0) (2023-03-28)
24
+
25
+
26
+ ### Features
27
+
28
+ * **basics:** Add TXTSentences ([c66abb5](https://github.com/Inist-CNRS/ezs/commit/c66abb5f242afcff57f98a7fa8eff918a7d60098))
29
+
30
+
31
+
32
+
33
+
6
34
  ## [1.23.5](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.23.4...@ezs/basics@1.23.5) (2023-03-24)
7
35
 
8
36
 
package/README.md CHANGED
@@ -30,6 +30,7 @@ npm install @ezs/basics
30
30
  - [TXTConcat](#txtconcat)
31
31
  - [TXTObject](#txtobject)
32
32
  - [TXTParse](#txtparse)
33
+ - [TXTSentences](#txtsentences)
33
34
  - [TXTZip](#txtzip)
34
35
  - [URLConnect](#urlconnect)
35
36
  - [URLFetch](#urlfetch)
@@ -601,6 +602,28 @@ Output:
601
602
 
602
603
  Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
603
604
 
605
+ ### TXTSentences
606
+
607
+ Take a `String` and split it into an array of sentences.
608
+
609
+ Input:
610
+
611
+ ```json
612
+ { "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." }
613
+ ```
614
+
615
+ Output:
616
+
617
+ ```json
618
+ { "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] }
619
+ ```
620
+
621
+ #### Parameters
622
+
623
+ - `path` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** path of the field to segment (optional, default `"value"`)
624
+
625
+ Returns **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>**
626
+
604
627
  ### TXTZip
605
628
 
606
629
  Take a `String` and zip it.
package/lib/index.js CHANGED
@@ -21,6 +21,8 @@ var _txtObject = _interopRequireDefault(require("./txt-object"));
21
21
 
22
22
  var _txtParse = _interopRequireDefault(require("./txt-parse"));
23
23
 
24
+ var _txtSentences = _interopRequireDefault(require("./txt-sentences"));
25
+
24
26
  var _xmlParse = _interopRequireDefault(require("./xml-parse"));
25
27
 
26
28
  var _xmlString = _interopRequireDefault(require("./xml-string"));
@@ -72,6 +74,7 @@ const funcs = {
72
74
  TXTParse: _txtParse.default,
73
75
  TXTObject: _txtObject.default,
74
76
  TXTConcat: _txtConcat.default,
77
+ TXTSentences: _txtSentences.default,
75
78
  XMLParse: _xmlParse.default,
76
79
  XMLString: _xmlString.default,
77
80
  XMLConvert: _xmlConvert.default,
@@ -0,0 +1,90 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.default = void 0;
7
+
8
+ var _lodash = _interopRequireDefault(require("lodash.get"));
9
+
10
+ function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
11
+
12
+ const UPPER_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
13
+ const SENTENCE_INIT = ' ';
14
+ const SENTENCE_ENDING = '.?!';
15
+ /*
16
+ * Segment sentences from `str` into an array
17
+ * @param {string} str
18
+ * @returns {string[]}
19
+ */
20
+
21
+ const segmentSentences = str => {
22
+ const characters = Array.from(str);
23
+ const sentences = characters.reduce(
24
+ /*
25
+ * @param {string[]} prevSentences
26
+ * @param {string} character
27
+ * @return {string[]}
28
+ */
29
+ (prevSentences, character) => {
30
+ const currentSentence = prevSentences.slice(-1)[0];
31
+ const [char1, char2] = currentSentence.slice(-2);
32
+
33
+ if (SENTENCE_ENDING.includes(character)) {
34
+ if (character !== '.') {
35
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
36
+ }
37
+
38
+ if (char1 !== ' ') {
39
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
40
+ }
41
+
42
+ if (!UPPER_LETTERS.includes(char2)) {
43
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
44
+ }
45
+ }
46
+
47
+ return [...prevSentences.slice(0, -1), currentSentence + character];
48
+ }, [SENTENCE_INIT]).filter(sentence => sentence !== SENTENCE_INIT).map(sentence => sentence.trimStart());
49
+ return sentences;
50
+ };
51
+
52
+ const TXTSentences = (data, feed, ctx) => {
53
+ if (ctx.isLast()) {
54
+ return feed.close();
55
+ }
56
+
57
+ const path = ctx.getParam('path', 'value');
58
+ const value = (0, _lodash.default)(data, path);
59
+ const str = Array.isArray(value) ? value.map(item => typeof item === 'string' ? item : '').join(' ') : value;
60
+ const sentences = str ? segmentSentences(str) : [];
61
+ feed.write({ ...data,
62
+ [path]: sentences
63
+ });
64
+ return feed.end();
65
+ };
66
+ /**
67
+ * Take a `String` and split it into an array of sentences.
68
+ *
69
+ * Input:
70
+ *
71
+ * ```json
72
+ * { "id": 1, "value": "First sentence? Second sentence. My name is Bond, J. Bond." }
73
+ * ```
74
+ *
75
+ * Output:
76
+ *
77
+ * ```json
78
+ * { "id": 1, "value": ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."] }
79
+ * ```
80
+ *
81
+ * @name TXTSentences
82
+ * @param {String} [path="value"] path of the field to segment
83
+ * @returns {String[]}
84
+ */
85
+
86
+
87
+ var _default = {
88
+ TXTSentences
89
+ };
90
+ exports.default = _default;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@ezs/basics",
3
3
  "description": "Basics statements for EZS",
4
- "version": "1.23.5",
4
+ "version": "2.0.0",
5
5
  "author": "Nicolas Thouvenin <nthouvenin@gmail.com>",
6
6
  "bugs": "https://github.com/Inist-CNRS/ezs/issues",
7
7
  "dependencies": {
@@ -39,7 +39,7 @@
39
39
  "directories": {
40
40
  "test": "test"
41
41
  },
42
- "gitHead": "bd9ad8cea9ee24f9c2c8cbc20c0ec09fbbc1d8ed",
42
+ "gitHead": "314a26ebcbf109ffdb6936ee4b8564e19dd17cbc",
43
43
  "homepage": "https://github.com/Inist-CNRS/ezs/tree/master/packages/basics#readme",
44
44
  "keywords": [
45
45
  "ezs"
package/lib/fetch.js DELETED
@@ -1,74 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = fetch;
7
-
8
- var _crossFetch = _interopRequireDefault(require("cross-fetch"));
9
-
10
- var _proxyFromEnv = require("proxy-from-env");
11
-
12
- var _http = _interopRequireDefault(require("http"));
13
-
14
- var _https = _interopRequireDefault(require("https"));
15
-
16
- var _betterHttpsProxyAgent = require("better-https-proxy-agent");
17
-
18
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
19
-
20
- const DefaultOptions = {
21
- keepAlive: true,
22
- timeout: 1000,
23
- keepAliveMsecs: 500,
24
- maxSockets: 200,
25
- maxFreeSockets: 5,
26
- maxCachedSessions: 500
27
- };
28
-
29
- const chooseAgent = (parsedURL, options) => {
30
- const proxyurl = (0, _proxyFromEnv.getProxyForUrl)(parsedURL.href);
31
-
32
- if (proxyurl) {
33
- const proxyRequestOptions = new URL(proxyurl);
34
- return new _betterHttpsProxyAgent.Agent(options, proxyRequestOptions);
35
- }
36
-
37
- if (parsedURL.protocol === 'https:') {
38
- return new _https.default.Agent(options);
39
- }
40
-
41
- return new _http.default.Agent(options);
42
- };
43
-
44
- function fetch(url, options) {
45
- const opts = options || {};
46
- const {
47
- keepAlive,
48
- timeout,
49
- keepAliveMsecs,
50
- maxSockets,
51
- maxFreeSockets,
52
- maxCachedSessions
53
- } = { ...options,
54
- ...DefaultOptions
55
- };
56
- let agent = chooseAgent(new URL(url), {
57
- keepAlive,
58
- timeout,
59
- keepAliveMsecs,
60
- maxSockets,
61
- maxFreeSockets,
62
- maxCachedSessions
63
- });
64
- opts.agent = agent;
65
-
66
- if (opts.signal) {
67
- opts.signal.addEventListener('abort', () => {
68
- agent.destroy();
69
- agent = null;
70
- });
71
- }
72
-
73
- return (0, _crossFetch.default)(url, options);
74
- }
@@ -1,73 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = void 0;
7
-
8
- function Concept(data, feed) {
9
- const obj = {};
10
- Object.keys(data).forEach(key => {
11
- const newkey = key.replace('skos$', '');
12
-
13
- if (Array.isArray(data[key])) {
14
- data[key].filter(x => x.xml$lang).forEach(item => {
15
- const localkey = newkey.concat('@').concat(item.xml$lang);
16
- obj[localkey] = item.$t;
17
- });
18
- } else if (data[key].rdf$resource && !obj[newkey]) {
19
- obj[newkey] = data[key].rdf$resource;
20
- } else if (data[key].rdf$resource && obj[newkey]) {
21
- obj[newkey] = [obj[newkey], data[key].rdf$resource];
22
- } else if (data[key].$t && data[key].xml$lang) {
23
- const localkey = newkey.concat('@').concat(data[key].xml$lang);
24
- obj[localkey] = data[key].$t;
25
- } else if (data[key].$t && Array.isArray(obj[newkey])) {
26
- obj[newkey].push(data[key].$t);
27
- } else if (data[key].$t && obj[newkey]) {
28
- obj[newkey] = [obj[newkey], data[key].$t];
29
- } else if (data[key].$t && !obj[newkey]) {
30
- obj[newkey] = data[key].$t;
31
- } else if (typeof data[key] === 'object') {
32
- obj[newkey] = (this.getIndex().toString(36) + Math.random().toString(36).substr(2, 5)).toUpperCase();
33
- let counter = 0;
34
- Object.keys(data[key]).forEach(key2 => {
35
- if (typeof data[key][key2] === 'object') {
36
- data[key][key2].rdf$about = obj[newkey];
37
- Concept.call(this, data[key][key2], feed);
38
- counter += 1;
39
- }
40
- });
41
-
42
- if (counter === 0) {
43
- delete obj[newkey];
44
- }
45
- } else {
46
- obj[newkey] = data[key];
47
- }
48
- });
49
- feed.write(obj);
50
- }
51
-
52
- function SKOSObject(data, feed) {
53
- if (this.isLast()) {
54
- feed.close();
55
- } else {
56
- Concept.call(this, data, feed);
57
- feed.end();
58
- }
59
- }
60
- /**
61
- * Take `Object` generated by XMLMapping & SKOS data and
62
- * create a new basic object with only keys & values
63
- *
64
- * @name SKOSObject
65
- * @param {undefined} none
66
- * @returns {Object}
67
- */
68
-
69
-
70
- var _default = {
71
- SKOSObject
72
- };
73
- exports.default = _default;
package/lib/url-pager.js DELETED
@@ -1,179 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = URLPager;
7
-
8
- var _debug = _interopRequireDefault(require("debug"));
9
-
10
- var _url = require("url");
11
-
12
- var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
13
-
14
- var _lodash = _interopRequireDefault(require("lodash.get"));
15
-
16
- var _parseHeaders = _interopRequireDefault(require("parse-headers"));
17
-
18
- var _asyncRetry = _interopRequireDefault(require("async-retry"));
19
-
20
- var _request = _interopRequireDefault(require("./request"));
21
-
22
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
23
-
24
- /**
25
- * Take `Object` as parameters of URL, throw each chunk from the result
26
- *
27
- *
28
- * Input:
29
- *
30
- * ```json
31
- * [{"q": "a"}]
32
- * ```
33
- *
34
- * Script:
35
- *
36
- * ```ini
37
- * [URLPager]
38
- * url = https://api.search.net
39
- * path = total
40
- * ```
41
- *
42
- * Output:
43
- *
44
- * ```json
45
- * [
46
- * {
47
- * "q": "a",
48
- * "total": 22
49
- * "offset": 0,
50
- * "pageNumber": 1,
51
- * "totalPages", 3,
52
- * "maxPages": 1000,
53
- * "limit": 10
54
- * },
55
- * {
56
- * "q": "a",
57
- * "total": 22
58
- * "offset": 10,
59
- * "pageNumber": 2,
60
- * "totalPages", 3,
61
- * "maxPages": 1000,
62
- * "limit": 10
63
- * },
64
- * {
65
- * "q": "a",
66
- * "total": 22
67
- * "offset": 20,
68
- * "pageNumber": 3,
69
- * "totalPages", 3,
70
- * "maxPages": 1000,
71
- * "limit": 10
72
- * }
73
- * ]
74
- * ```
75
- *
76
- * #### Example with URLs
77
- *
78
- * Input:
79
- *
80
- * ```json
81
- * [
82
- * "https://httpbin.org/get?a=a",
83
- * "https://httpbin.org/get?a=b",
84
- * "https://httpbin.org/get?a=c"
85
- * ]
86
- * ```
87
- *
88
- * Script:
89
- *
90
- * ```ini
91
- * [URLPager]
92
- * path = .args
93
- * ```
94
- *
95
- * Output:
96
- *
97
- * ```json
98
- * [{"a": "a"}, {"a": "b"}, {"a": "c" }]
99
- * ```
100
- *
101
- * @name URLPager
102
- * @param {String} [url] URL to fetch (by default input string is taken)
103
- * @param {String} [path=total] choose the path to find the number of result
104
- * @param {Number} [timeout=1000] Timeout in milliseconds
105
- * @param {Boolean} [noerror=false] Ignore all errors, the target field will remain undefined
106
- * @param {Number} [retries=5] The maximum amount of times to retry the connection
107
- * @returns {Object}
108
- */
109
- async function URLPager(data, feed) {
110
- if (this.isLast()) {
111
- return feed.close();
112
- }
113
-
114
- const url = this.getParam('url');
115
- const path = this.getParam('path', 'total');
116
- const limit = Number(this.getParam('limit', 10));
117
- const maxPages = Number(this.getParam('maxPages', 1000));
118
- const retries = Number(this.getParam('retries', 5));
119
- const noerror = Boolean(this.getParam('noerror', false));
120
- const timeout = Number(this.getParam('timeout')) || 1000;
121
- const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
122
- const cURL = new _url.URL(url || data);
123
- const controller = new _nodeAbortController.default();
124
- const parameters = {
125
- timeout,
126
- headers,
127
- signal: controller.signal
128
- };
129
- const options = {
130
- retries
131
- };
132
- cURL.search = new _url.URLSearchParams(data);
133
-
134
- const onError = e => {
135
- controller.abort();
136
-
137
- if (noerror) {
138
- (0, _debug.default)('ezs')(`Ignore item #${this.getIndex()} [URLPager] <${e}>`);
139
- return feed.send(data);
140
- }
141
-
142
- (0, _debug.default)('ezs')(`Break item #${this.getIndex()} [URLPager] <${e}>`);
143
- return feed.send(e);
144
- };
145
-
146
- try {
147
- const response = await (0, _asyncRetry.default)((0, _request.default)(cURL.href, parameters), options);
148
- const json = await response.json();
149
- const total = (0, _lodash.default)(json, path);
150
-
151
- if (total === 0) {
152
- return onError(new Error('No result.'));
153
- }
154
-
155
- if (total === undefined) {
156
- return onError(new Error('Unexpected response.'));
157
- }
158
-
159
- let totalPages = Math.ceil(json.total / limit);
160
-
161
- if (totalPages > maxPages) {
162
- totalPages = maxPages;
163
- }
164
-
165
- for (let pageNumber = 1; pageNumber <= totalPages; pageNumber += 1) {
166
- feed.write({ ...data,
167
- offset: (pageNumber - 1) * limit,
168
- pageNumber,
169
- totalPages,
170
- maxPages,
171
- limit
172
- });
173
- }
174
-
175
- feed.end();
176
- } catch (e) {
177
- onError(e);
178
- }
179
- }
package/lib/utils.js DELETED
@@ -1,18 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = exports.writeTo = void 0;
7
-
8
- const writeTo = function writeTo(stream, data, cb) {
9
- if (!stream.write(data)) {
10
- stream.once('drain', cb);
11
- } else {
12
- process.nextTick(cb);
13
- }
14
- };
15
-
16
- exports.writeTo = writeTo;
17
- var _default = writeTo;
18
- exports.default = _default;