@ezs/basics 1.23.4 → 1.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -3,6 +3,28 @@
3
3
  All notable changes to this project will be documented in this file.
4
4
  See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
5
5
 
6
+ # [1.24.0](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.23.5...@ezs/basics@1.24.0) (2023-03-28)
7
+
8
+
9
+ ### Features
10
+
11
+ * **basics:** Add TXTSentences ([c66abb5](https://github.com/Inist-CNRS/ezs/commit/c66abb5f242afcff57f98a7fa8eff918a7d60098))
12
+
13
+
14
+
15
+
16
+
17
+ ## [1.23.5](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.23.4...@ezs/basics@1.23.5) (2023-03-24)
18
+
19
+
20
+ ### Bug Fixes
21
+
22
+ * 🐛 multi bytes in txtconcat ([7c705fa](https://github.com/Inist-CNRS/ezs/commit/7c705facc1378fae709bbae6a2b416b059e81731))
23
+
24
+
25
+
26
+
27
+
6
28
  ## [1.23.4](https://github.com/Inist-CNRS/ezs/compare/@ezs/basics@1.23.3...@ezs/basics@1.23.4) (2023-03-24)
7
29
 
8
30
 
package/README.md CHANGED
@@ -30,6 +30,7 @@ npm install @ezs/basics
30
30
  - [TXTConcat](#txtconcat)
31
31
  - [TXTObject](#txtobject)
32
32
  - [TXTParse](#txtparse)
33
+ - [TXTSentences](#txtsentences)
33
34
  - [TXTZip](#txtzip)
34
35
  - [URLConnect](#urlconnect)
35
36
  - [URLFetch](#urlfetch)
@@ -601,6 +602,24 @@ Output:
601
602
 
602
603
  Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
603
604
 
605
+ ### TXTSentences
606
+
607
+ Take a `String` and split it into an array of sentences.
608
+
609
+ Input:
610
+
611
+ ```json
612
+ "First sentence? Second sentence. My name is Bond, J. Bond."
613
+ ```
614
+
615
+ Output:
616
+
617
+ ```json
618
+ ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."]
619
+ ```
620
+
621
+ Returns **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**
622
+
604
623
  ### TXTZip
605
624
 
606
625
  Take a `String` and zip it.
package/lib/index.js CHANGED
@@ -21,6 +21,8 @@ var _txtObject = _interopRequireDefault(require("./txt-object"));
21
21
 
22
22
  var _txtParse = _interopRequireDefault(require("./txt-parse"));
23
23
 
24
+ var _txtSentences = _interopRequireDefault(require("./txt-sentences"));
25
+
24
26
  var _xmlParse = _interopRequireDefault(require("./xml-parse"));
25
27
 
26
28
  var _xmlString = _interopRequireDefault(require("./xml-string"));
@@ -72,6 +74,7 @@ const funcs = {
72
74
  TXTParse: _txtParse.default,
73
75
  TXTObject: _txtObject.default,
74
76
  TXTConcat: _txtConcat.default,
77
+ TXTSentences: _txtSentences.default,
75
78
  XMLParse: _xmlParse.default,
76
79
  XMLString: _xmlString.default,
77
80
  XMLConvert: _xmlConvert.default,
package/lib/txt-concat.js CHANGED
@@ -5,16 +5,23 @@ Object.defineProperty(exports, "__esModule", {
5
5
  });
6
6
  exports.default = void 0;
7
7
 
8
+ var _string_decoder = require("string_decoder");
9
+
8
10
  function TXTConcat(data, feed) {
11
+ if (!this.decoder) {
12
+ this.decoder = new _string_decoder.StringDecoder('utf8');
13
+ }
14
+
9
15
  if (this.buffer === undefined) {
10
16
  this.buffer = '';
11
17
  }
12
18
 
13
19
  if (this.isLast()) {
20
+ this.decoder.end();
14
21
  feed.send(this.buffer);
15
22
  feed.close();
16
23
  } else {
17
- this.buffer = this.buffer.concat(data);
24
+ this.buffer = this.buffer.concat(Buffer.isBuffer(data) ? this.decoder.write(data) : data);
18
25
  feed.end();
19
26
  }
20
27
  }
@@ -0,0 +1,100 @@
1
+ "use strict";
2
+
3
+ Object.defineProperty(exports, "__esModule", {
4
+ value: true
5
+ });
6
+ exports.default = void 0;
7
+
8
+ var _string_decoder = require("string_decoder");
9
+
10
+ const UPPER_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
11
+ const SENTENCE_INIT = ' ';
12
+ const SENTENCE_ENDING = '.?!';
13
+ /*
14
+ * Segment sentences from `str` into an array
15
+ * @param {string} str
16
+ * @returns {string[]}
17
+ */
18
+
19
+ const segmentSentences = str => {
20
+ const characters = str.split('');
21
+ const sentences = characters.reduce(
22
+ /*
23
+ * @param {string[]} prevSentences
24
+ * @param {string} character
25
+ * @return {string[]}
26
+ */
27
+ (prevSentences, character) => {
28
+ const currentSentence = prevSentences.slice(-1)[0];
29
+ const [char1, char2] = currentSentence.slice(-2);
30
+
31
+ if (SENTENCE_ENDING.includes(character)) {
32
+ if (character !== '.') {
33
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
34
+ }
35
+
36
+ if (char1 !== ' ') {
37
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
38
+ }
39
+
40
+ if (!UPPER_LETTERS.includes(char2)) {
41
+ return [...prevSentences.slice(0, -1), currentSentence + character, SENTENCE_INIT];
42
+ }
43
+ }
44
+
45
+ return [...prevSentences.slice(0, -1), currentSentence + character];
46
+ }, [SENTENCE_INIT]).map(sentence => sentence.trimStart());
47
+ return sentences;
48
+ };
49
+
50
+ const TXTSentences = (data, feed, ctx) => {
51
+ if (!ctx.decoder) {
52
+ ctx.decoder = new _string_decoder.StringDecoder('utf8');
53
+ }
54
+
55
+ if (ctx.isLast()) {
56
+ ctx.decoder.end();
57
+ return feed.end();
58
+ }
59
+
60
+ ctx.remainder = ctx.remainder ?? '';
61
+ let str;
62
+
63
+ if (Buffer.isBuffer(data)) {
64
+ str = ctx.decoder.write(data);
65
+ } else if (typeof data === 'string') {
66
+ str = data;
67
+ }
68
+
69
+ const lines = str ? segmentSentences(str) : [];
70
+ lines.unshift(ctx.remainder + lines.shift());
71
+ ctx.remainder = lines.pop();
72
+ lines.forEach(line => {
73
+ feed.write(line);
74
+ });
75
+ return feed.end();
76
+ };
77
+ /**
78
+ * Take a `String` and split it into an array of sentences.
79
+ *
80
+ * Input:
81
+ *
82
+ * ```json
83
+ * "First sentence? Second sentence. My name is Bond, J. Bond."
84
+ * ```
85
+ *
86
+ * Output:
87
+ *
88
+ * ```json
89
+ * ["First sentence?", "Second sentence.", "My name is Bond, J. Bond."]
90
+ * ```
91
+ *
92
+ * @name TXTSentences
93
+ * @returns {String}
94
+ */
95
+
96
+
97
+ var _default = {
98
+ TXTSentences
99
+ };
100
+ exports.default = _default;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@ezs/basics",
3
3
  "description": "Basics statements for EZS",
4
- "version": "1.23.4",
4
+ "version": "1.24.0",
5
5
  "author": "Nicolas Thouvenin <nthouvenin@gmail.com>",
6
6
  "bugs": "https://github.com/Inist-CNRS/ezs/issues",
7
7
  "dependencies": {
@@ -39,7 +39,7 @@
39
39
  "directories": {
40
40
  "test": "test"
41
41
  },
42
- "gitHead": "8da84f8d6833a91a2ae87fdb635070a811c3f9ef",
42
+ "gitHead": "0f52795f9c5d1a452adbad48ab567298bb8a6bea",
43
43
  "homepage": "https://github.com/Inist-CNRS/ezs/tree/master/packages/basics#readme",
44
44
  "keywords": [
45
45
  "ezs"
package/lib/fetch.js DELETED
@@ -1,74 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = fetch;
7
-
8
- var _crossFetch = _interopRequireDefault(require("cross-fetch"));
9
-
10
- var _proxyFromEnv = require("proxy-from-env");
11
-
12
- var _http = _interopRequireDefault(require("http"));
13
-
14
- var _https = _interopRequireDefault(require("https"));
15
-
16
- var _betterHttpsProxyAgent = require("better-https-proxy-agent");
17
-
18
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
19
-
20
- const DefaultOptions = {
21
- keepAlive: true,
22
- timeout: 1000,
23
- keepAliveMsecs: 500,
24
- maxSockets: 200,
25
- maxFreeSockets: 5,
26
- maxCachedSessions: 500
27
- };
28
-
29
- const chooseAgent = (parsedURL, options) => {
30
- const proxyurl = (0, _proxyFromEnv.getProxyForUrl)(parsedURL.href);
31
-
32
- if (proxyurl) {
33
- const proxyRequestOptions = new URL(proxyurl);
34
- return new _betterHttpsProxyAgent.Agent(options, proxyRequestOptions);
35
- }
36
-
37
- if (parsedURL.protocol === 'https:') {
38
- return new _https.default.Agent(options);
39
- }
40
-
41
- return new _http.default.Agent(options);
42
- };
43
-
44
- function fetch(url, options) {
45
- const opts = options || {};
46
- const {
47
- keepAlive,
48
- timeout,
49
- keepAliveMsecs,
50
- maxSockets,
51
- maxFreeSockets,
52
- maxCachedSessions
53
- } = { ...options,
54
- ...DefaultOptions
55
- };
56
- let agent = chooseAgent(new URL(url), {
57
- keepAlive,
58
- timeout,
59
- keepAliveMsecs,
60
- maxSockets,
61
- maxFreeSockets,
62
- maxCachedSessions
63
- });
64
- opts.agent = agent;
65
-
66
- if (opts.signal) {
67
- opts.signal.addEventListener('abort', () => {
68
- agent.destroy();
69
- agent = null;
70
- });
71
- }
72
-
73
- return (0, _crossFetch.default)(url, options);
74
- }
@@ -1,73 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = void 0;
7
-
8
- function Concept(data, feed) {
9
- const obj = {};
10
- Object.keys(data).forEach(key => {
11
- const newkey = key.replace('skos$', '');
12
-
13
- if (Array.isArray(data[key])) {
14
- data[key].filter(x => x.xml$lang).forEach(item => {
15
- const localkey = newkey.concat('@').concat(item.xml$lang);
16
- obj[localkey] = item.$t;
17
- });
18
- } else if (data[key].rdf$resource && !obj[newkey]) {
19
- obj[newkey] = data[key].rdf$resource;
20
- } else if (data[key].rdf$resource && obj[newkey]) {
21
- obj[newkey] = [obj[newkey], data[key].rdf$resource];
22
- } else if (data[key].$t && data[key].xml$lang) {
23
- const localkey = newkey.concat('@').concat(data[key].xml$lang);
24
- obj[localkey] = data[key].$t;
25
- } else if (data[key].$t && Array.isArray(obj[newkey])) {
26
- obj[newkey].push(data[key].$t);
27
- } else if (data[key].$t && obj[newkey]) {
28
- obj[newkey] = [obj[newkey], data[key].$t];
29
- } else if (data[key].$t && !obj[newkey]) {
30
- obj[newkey] = data[key].$t;
31
- } else if (typeof data[key] === 'object') {
32
- obj[newkey] = (this.getIndex().toString(36) + Math.random().toString(36).substr(2, 5)).toUpperCase();
33
- let counter = 0;
34
- Object.keys(data[key]).forEach(key2 => {
35
- if (typeof data[key][key2] === 'object') {
36
- data[key][key2].rdf$about = obj[newkey];
37
- Concept.call(this, data[key][key2], feed);
38
- counter += 1;
39
- }
40
- });
41
-
42
- if (counter === 0) {
43
- delete obj[newkey];
44
- }
45
- } else {
46
- obj[newkey] = data[key];
47
- }
48
- });
49
- feed.write(obj);
50
- }
51
-
52
- function SKOSObject(data, feed) {
53
- if (this.isLast()) {
54
- feed.close();
55
- } else {
56
- Concept.call(this, data, feed);
57
- feed.end();
58
- }
59
- }
60
- /**
61
- * Take `Object` generated by XMLMapping & SKOS data and
62
- * create a new basic object with only keys & values
63
- *
64
- * @name SKOSObject
65
- * @param {undefined} none
66
- * @returns {Object}
67
- */
68
-
69
-
70
- var _default = {
71
- SKOSObject
72
- };
73
- exports.default = _default;
package/lib/url-pager.js DELETED
@@ -1,179 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = URLPager;
7
-
8
- var _debug = _interopRequireDefault(require("debug"));
9
-
10
- var _url = require("url");
11
-
12
- var _nodeAbortController = _interopRequireDefault(require("node-abort-controller"));
13
-
14
- var _lodash = _interopRequireDefault(require("lodash.get"));
15
-
16
- var _parseHeaders = _interopRequireDefault(require("parse-headers"));
17
-
18
- var _asyncRetry = _interopRequireDefault(require("async-retry"));
19
-
20
- var _request = _interopRequireDefault(require("./request"));
21
-
22
- function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
23
-
24
- /**
25
- * Take `Object` as parameters of URL, throw each chunk from the result
26
- *
27
- *
28
- * Input:
29
- *
30
- * ```json
31
- * [{"q": "a"}]
32
- * ```
33
- *
34
- * Script:
35
- *
36
- * ```ini
37
- * [URLPager]
38
- * url = https://api.search.net
39
- * path = total
40
- * ```
41
- *
42
- * Output:
43
- *
44
- * ```json
45
- * [
46
- * {
47
- * "q": "a",
48
- * "total": 22
49
- * "offset": 0,
50
- * "pageNumber": 1,
51
- * "totalPages", 3,
52
- * "maxPages": 1000,
53
- * "limit": 10
54
- * },
55
- * {
56
- * "q": "a",
57
- * "total": 22
58
- * "offset": 10,
59
- * "pageNumber": 2,
60
- * "totalPages", 3,
61
- * "maxPages": 1000,
62
- * "limit": 10
63
- * },
64
- * {
65
- * "q": "a",
66
- * "total": 22
67
- * "offset": 20,
68
- * "pageNumber": 3,
69
- * "totalPages", 3,
70
- * "maxPages": 1000,
71
- * "limit": 10
72
- * }
73
- * ]
74
- * ```
75
- *
76
- * #### Example with URLs
77
- *
78
- * Input:
79
- *
80
- * ```json
81
- * [
82
- * "https://httpbin.org/get?a=a",
83
- * "https://httpbin.org/get?a=b",
84
- * "https://httpbin.org/get?a=c"
85
- * ]
86
- * ```
87
- *
88
- * Script:
89
- *
90
- * ```ini
91
- * [URLPager]
92
- * path = .args
93
- * ```
94
- *
95
- * Output:
96
- *
97
- * ```json
98
- * [{"a": "a"}, {"a": "b"}, {"a": "c" }]
99
- * ```
100
- *
101
- * @name URLPager
102
- * @param {String} [url] URL to fetch (by default input string is taken)
103
- * @param {String} [path=total] choose the path to find the number of result
104
- * @param {Number} [timeout=1000] Timeout in milliseconds
105
- * @param {Boolean} [noerror=false] Ignore all errors, the target field will remain undefined
106
- * @param {Number} [retries=5] The maximum amount of times to retry the connection
107
- * @returns {Object}
108
- */
109
- async function URLPager(data, feed) {
110
- if (this.isLast()) {
111
- return feed.close();
112
- }
113
-
114
- const url = this.getParam('url');
115
- const path = this.getParam('path', 'total');
116
- const limit = Number(this.getParam('limit', 10));
117
- const maxPages = Number(this.getParam('maxPages', 1000));
118
- const retries = Number(this.getParam('retries', 5));
119
- const noerror = Boolean(this.getParam('noerror', false));
120
- const timeout = Number(this.getParam('timeout')) || 1000;
121
- const headers = (0, _parseHeaders.default)([].concat(this.getParam('header')).filter(Boolean).join('\n'));
122
- const cURL = new _url.URL(url || data);
123
- const controller = new _nodeAbortController.default();
124
- const parameters = {
125
- timeout,
126
- headers,
127
- signal: controller.signal
128
- };
129
- const options = {
130
- retries
131
- };
132
- cURL.search = new _url.URLSearchParams(data);
133
-
134
- const onError = e => {
135
- controller.abort();
136
-
137
- if (noerror) {
138
- (0, _debug.default)('ezs')(`Ignore item #${this.getIndex()} [URLPager] <${e}>`);
139
- return feed.send(data);
140
- }
141
-
142
- (0, _debug.default)('ezs')(`Break item #${this.getIndex()} [URLPager] <${e}>`);
143
- return feed.send(e);
144
- };
145
-
146
- try {
147
- const response = await (0, _asyncRetry.default)((0, _request.default)(cURL.href, parameters), options);
148
- const json = await response.json();
149
- const total = (0, _lodash.default)(json, path);
150
-
151
- if (total === 0) {
152
- return onError(new Error('No result.'));
153
- }
154
-
155
- if (total === undefined) {
156
- return onError(new Error('Unexpected response.'));
157
- }
158
-
159
- let totalPages = Math.ceil(json.total / limit);
160
-
161
- if (totalPages > maxPages) {
162
- totalPages = maxPages;
163
- }
164
-
165
- for (let pageNumber = 1; pageNumber <= totalPages; pageNumber += 1) {
166
- feed.write({ ...data,
167
- offset: (pageNumber - 1) * limit,
168
- pageNumber,
169
- totalPages,
170
- maxPages,
171
- limit
172
- });
173
- }
174
-
175
- feed.end();
176
- } catch (e) {
177
- onError(e);
178
- }
179
- }
package/lib/utils.js DELETED
@@ -1,18 +0,0 @@
1
- "use strict";
2
-
3
- Object.defineProperty(exports, "__esModule", {
4
- value: true
5
- });
6
- exports.default = exports.writeTo = void 0;
7
-
8
- const writeTo = function writeTo(stream, data, cb) {
9
- if (!stream.write(data)) {
10
- stream.once('drain', cb);
11
- } else {
12
- process.nextTick(cb);
13
- }
14
- };
15
-
16
- exports.writeTo = writeTo;
17
- var _default = writeTo;
18
- exports.default = _default;