@iebh/reflib 2.0.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -9
- package/lib/readFile.js +32 -3
- package/lib/uploadFile.js +3 -2
- package/modules/endnoteXml.js +104 -65
- package/modules/interface.js +1 -0
- package/modules/json.js +3 -2
- package/modules/medline.js +1 -0
- package/modules/ris.js +1 -0
- package/package.json +1 -1
- package/shared/streamEmitter.js +5 -24
package/README.md
CHANGED
|
@@ -193,6 +193,19 @@ reflib.readFile('./data/json/json1.json')
|
|
|
193
193
|
.then(refs => /* Do something with Ref collection */)
|
|
194
194
|
```
|
|
195
195
|
|
|
196
|
+
An emitter is available to track progress while reading. Note that due to the chainable nature of promises the first return contains the `emitter` key only:
|
|
197
|
+
|
|
198
|
+
```javascript
|
|
199
|
+
let reader = reflib.readFile('./data/json/json1.json');
|
|
200
|
+
|
|
201
|
+
reader.emitter
|
|
202
|
+
.on('progress', ({readBytes, totalSize, refsFound}) => /* Report progress somehow */);
|
|
203
|
+
.on('end', ({refsFound}) => /* Report progress somehow */);
|
|
204
|
+
|
|
205
|
+
reader
|
|
206
|
+
.then(refs => /* Do something with Ref collection */)
|
|
207
|
+
```
|
|
208
|
+
|
|
196
209
|
uploadFile(options)
|
|
197
210
|
===================
|
|
198
211
|
Available: Browser
|
|
@@ -257,12 +270,3 @@ Credits
|
|
|
257
270
|
=======
|
|
258
271
|
Developed for the [Bond University Institute for Evidence-Based Healthcare](https://iebh.bond.edu.au).
|
|
259
272
|
Please contact [the author](mailto:matt_carter@bond.edu.au) with any issues.
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
TODO
|
|
263
|
-
====
|
|
264
|
-
- [x] Basic parsing iterfaces
|
|
265
|
-
- [ ] "progress" emitter for files
|
|
266
|
-
- [x] `.uploadFile()` browser compatibility
|
|
267
|
-
- [ ] `.downloadFile()` browser compatibility
|
|
268
|
-
- [x] `setup()` functions per module to avoid things like map calculations unless the module is actually needed
|
package/lib/readFile.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import {createReadStream} from 'node:fs';
|
|
2
|
+
import Emitter from '../shared/emitter.js';
|
|
2
3
|
import {stat} from 'node:fs/promises';
|
|
3
4
|
import {identifyFormat} from './identifyFormat.js';
|
|
4
5
|
import {readStream} from './readStream.js';
|
|
@@ -9,16 +10,26 @@ import {readStream} from './readStream.js';
|
|
|
9
10
|
* @param {string} path The file path to parse
|
|
10
11
|
* @param {Object} [options] Additional options to pass to the parser
|
|
11
12
|
* @param {string} [options.module] The module to use if overriding from the file path
|
|
13
|
+
*
|
|
12
14
|
* @returns {Promise<Array>} An eventual array of all references parsed from the file
|
|
15
|
+
* @property {EventEmitter} emitter An event emitter which will fire the below events
|
|
16
|
+
*
|
|
17
|
+
* @fires progress Emitted as `({readBytes: Number, totalSize: Number, refsFound: Number})`
|
|
18
|
+
* @fires end Emitted as `({refsFound: Number})` when the reading operation has completed
|
|
13
19
|
*/
|
|
14
20
|
export function readFile(path, options) {
|
|
15
|
-
let settings = {
|
|
21
|
+
let settings = {
|
|
22
|
+
progressTotal: false,
|
|
23
|
+
...options,
|
|
24
|
+
};
|
|
16
25
|
let module = options?.module || identifyFormat(path)?.id;
|
|
17
26
|
if (!module) throw new Error(`Unable to identify reference library format for file path "${path}"`);
|
|
18
27
|
|
|
19
|
-
|
|
28
|
+
let promiseEmitter = Promise.resolve()
|
|
29
|
+
.then(()=> stat(path))
|
|
20
30
|
.then(stats => new Promise((resolve, reject) => {
|
|
21
|
-
let refs = [];
|
|
31
|
+
let refs = []; // eslint-disable-line no-unused-vars
|
|
32
|
+
|
|
22
33
|
readStream(
|
|
23
34
|
module,
|
|
24
35
|
createReadStream(path),
|
|
@@ -30,5 +41,23 @@ export function readFile(path, options) {
|
|
|
30
41
|
.on('end', ()=> resolve(refs))
|
|
31
42
|
.on('error', reject)
|
|
32
43
|
.on('ref', ref => refs.push(ref))
|
|
44
|
+
.on('progress', readBytes => promiseEmitter.emitter.emit('progress', {
|
|
45
|
+
readBytes,
|
|
46
|
+
totalSize: stats.size,
|
|
47
|
+
refsFound: refs.length,
|
|
48
|
+
}))
|
|
33
49
|
}))
|
|
50
|
+
.then(refs => {
|
|
51
|
+
promiseEmitter.emitter.emit('end', {refsFound: refs.length})
|
|
52
|
+
return refs;
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
// Extend our base promise with an emitter subkey
|
|
56
|
+
return Object.defineProperties(promiseEmitter, {
|
|
57
|
+
emitter: {
|
|
58
|
+
value: Emitter(),
|
|
59
|
+
enumerable: true,
|
|
60
|
+
writable: false,
|
|
61
|
+
},
|
|
62
|
+
});
|
|
34
63
|
}
|
package/lib/uploadFile.js
CHANGED
|
@@ -6,7 +6,7 @@ import StreamEmitter from '../shared/streamEmitter.js';
|
|
|
6
6
|
/**
|
|
7
7
|
* Prompt the user for a file then read it as a Reflib event emitter
|
|
8
8
|
* @param {Object} [options] Additional options when prompting the user
|
|
9
|
-
* @param {File} [options.
|
|
9
|
+
* @param {File} [options.file] The File object to process, omitting this will prompt the user to select a file
|
|
10
10
|
* @param {function} [options.onStart] Async function called as `(File)` when starting the read stage
|
|
11
11
|
* @param {function} [options.onProgress] Function called as `(position, totalSize)` when processing the file
|
|
12
12
|
* @param {function} [options.onEnd] Async function called as `()` when the read stage has completed
|
|
@@ -49,9 +49,10 @@ export function uploadFile(options) {
|
|
|
49
49
|
return Promise.resolve()
|
|
50
50
|
.then(()=> settings.onStart && settings.onStart(settings.file))
|
|
51
51
|
.then(()=> new Promise((resolve, reject) => {
|
|
52
|
+
|
|
52
53
|
let streamer = readStream(
|
|
53
54
|
identifiedType.id,
|
|
54
|
-
StreamEmitter(settings.file.stream()
|
|
55
|
+
StreamEmitter(settings.file.stream()),
|
|
55
56
|
{
|
|
56
57
|
...settings,
|
|
57
58
|
size: settings.file.size,
|
package/modules/endnoteXml.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import camelCase from '../shared/camelCase.js';
|
|
2
2
|
import Emitter from '../shared/emitter.js';
|
|
3
|
-
import {WritableStream as XMLParser} from 'htmlparser2/lib/WritableStream';
|
|
4
3
|
|
|
4
|
+
// TODO: CF: Don't need to import both, it depends if we are on browser or node
|
|
5
|
+
import * as htmlparser2 from "htmlparser2";
|
|
6
|
+
import {WritableStream as XMLParser} from 'htmlparser2/lib/WritableStream';
|
|
5
7
|
|
|
6
8
|
/**
|
|
7
9
|
* @see modules/interface.js
|
|
@@ -30,75 +32,112 @@ export function readStream(stream) {
|
|
|
30
32
|
*/
|
|
31
33
|
let textAppend = false;
|
|
32
34
|
|
|
35
|
+
/**
|
|
36
|
+
* The options/callbacks for the parser
|
|
37
|
+
* @type {Object}
|
|
38
|
+
*/
|
|
39
|
+
let parserOptions = {
|
|
40
|
+
xmlMode: true,
|
|
41
|
+
decodeEntities: false, // Handled below
|
|
42
|
+
onopentag(name, attrs) {
|
|
43
|
+
textAppend = false;
|
|
44
|
+
stack.push({
|
|
45
|
+
name: camelCase(name),
|
|
46
|
+
attrs,
|
|
47
|
+
});
|
|
48
|
+
},
|
|
49
|
+
onclosetag(name) {
|
|
50
|
+
if (name == 'record') {
|
|
51
|
+
if (ref.title) ref.title = ref.title // htmlparser2 handles the '<title>' tag in a really bizare way so we have to pull apart the <style> bits when parsing
|
|
52
|
+
.replace(/^.*<style.*>(.*)<\/style>.*$/m, '$1')
|
|
53
|
+
.replace(/^\s+/, '')
|
|
54
|
+
.replace(/\s+$/, '')
|
|
55
|
+
emitter.emit('ref', translateRawToRef(ref));
|
|
56
|
+
stack = []; // Trash entire stack when hitting end of <record/> node
|
|
57
|
+
ref = {}; // Reset the ref state
|
|
58
|
+
} else {
|
|
59
|
+
stack.pop();
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
ontext(text) {
|
|
63
|
+
let parentName = stack[stack.length - 1]?.name;
|
|
64
|
+
let gParentName = stack[stack.length - 2]?.name;
|
|
65
|
+
if (parentName == 'title') {
|
|
66
|
+
if (textAppend) {
|
|
67
|
+
ref.title += text;
|
|
68
|
+
} else {
|
|
69
|
+
ref.title = text;
|
|
70
|
+
}
|
|
71
|
+
} else if (parentName == 'style' && gParentName == 'author') {
|
|
72
|
+
if (!ref.authors) ref.authors = [];
|
|
73
|
+
if (textAppend) {
|
|
74
|
+
ref.authors[ref.authors.length - 1] += xmlUnescape(text);
|
|
75
|
+
} else {
|
|
76
|
+
ref.authors.push(xmlUnescape(text));
|
|
77
|
+
}
|
|
78
|
+
} else if (parentName == 'style' && gParentName == 'keyword') {
|
|
79
|
+
if (!ref.keywords) ref.keywords = [];
|
|
80
|
+
if (textAppend) {
|
|
81
|
+
ref.keywords[ref.keywords.length - 1] += xmlUnescape(text);
|
|
82
|
+
} else {
|
|
83
|
+
ref.keywords.push(xmlUnescape(text));
|
|
84
|
+
}
|
|
85
|
+
} else if (parentName == 'style') { // Text within <style/> tag
|
|
86
|
+
if (textAppend || ref[gParentName]) { // Text already exists? Append (handles node-expats silly multi-text per escape character "feature")
|
|
87
|
+
ref[gParentName] += xmlUnescape(text);
|
|
88
|
+
} else {
|
|
89
|
+
ref[gParentName] = xmlUnescape(text);
|
|
90
|
+
}
|
|
91
|
+
} else if (['recNumber', 'refType'].includes(parentName)) { // Simple setters like <rec-number/>
|
|
92
|
+
if (textAppend || ref[parentName]) {
|
|
93
|
+
ref[parentName] += xmlUnescape(text);
|
|
94
|
+
} else {
|
|
95
|
+
ref[parentName] = xmlUnescape(text);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
textAppend = true; // Always set the next call to the text emitter handler as an append operation
|
|
99
|
+
},
|
|
100
|
+
onend() {
|
|
101
|
+
emitter.emit('end');
|
|
102
|
+
}
|
|
103
|
+
}
|
|
33
104
|
|
|
34
105
|
// Queue up the parser in the next tick (so we can return the emitter first)
|
|
35
|
-
setTimeout(()=> {
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
106
|
+
setTimeout(() => {
|
|
107
|
+
|
|
108
|
+
if (typeof stream.pipe === 'function') {
|
|
109
|
+
// We are on the node.js client
|
|
110
|
+
let parser = new XMLParser(parserOptions);
|
|
111
|
+
stream.on('data', ()=> emitter.emit('progress', stream.bytesRead))
|
|
112
|
+
stream.pipe(parser)
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// TODO: CF: We may want to consider moving to a DIY parser for speed and memory efficiency
|
|
117
|
+
if (typeof stream.getReader === 'function') {
|
|
118
|
+
// We are on the browser
|
|
119
|
+
var reader = stream.getReader();
|
|
120
|
+
var parser = new htmlparser2.Parser(parserOptions);
|
|
121
|
+
parseXMLOnBrowser();
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function parseXMLOnBrowser() {
|
|
126
|
+
reader.read().then(({done, value}) => {
|
|
127
|
+
if (done) {
|
|
128
|
+
parser.end();
|
|
55
129
|
} else {
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
let parentName = stack[stack.length - 1]?.name;
|
|
61
|
-
let gParentName = stack[stack.length - 2]?.name;
|
|
62
|
-
if (parentName == 'title') {
|
|
63
|
-
if (textAppend) {
|
|
64
|
-
ref.title += text;
|
|
65
|
-
} else {
|
|
66
|
-
ref.title = text;
|
|
67
|
-
}
|
|
68
|
-
} else if (parentName == 'style' && gParentName == 'author') {
|
|
69
|
-
if (!ref.authors) ref.authors = [];
|
|
70
|
-
if (textAppend) {
|
|
71
|
-
ref.authors[ref.authors.length - 1] += xmlUnescape(text);
|
|
72
|
-
} else {
|
|
73
|
-
ref.authors.push(xmlUnescape(text));
|
|
74
|
-
}
|
|
75
|
-
} else if (parentName == 'style' && gParentName == 'keyword') {
|
|
76
|
-
if (!ref.keywords) ref.keywords = [];
|
|
77
|
-
if (textAppend) {
|
|
78
|
-
ref.keywords[ref.keywords.length - 1] += xmlUnescape(text);
|
|
79
|
-
} else {
|
|
80
|
-
ref.keywords.push(xmlUnescape(text));
|
|
81
|
-
}
|
|
82
|
-
} else if (parentName == 'style') { // Text within <style/> tag
|
|
83
|
-
if (textAppend || ref[gParentName]) { // Text already exists? Append (handles node-expats silly multi-text per escape character "feature")
|
|
84
|
-
ref[gParentName] += xmlUnescape(text);
|
|
85
|
-
} else {
|
|
86
|
-
ref[gParentName] = xmlUnescape(text);
|
|
87
|
-
}
|
|
88
|
-
} else if (['recNumber', 'refType'].includes(parentName)) { // Simple setters like <rec-number/>
|
|
89
|
-
if (textAppend || ref[parentName]) {
|
|
90
|
-
ref[parentName] += xmlUnescape(text);
|
|
91
|
-
} else {
|
|
92
|
-
ref[parentName] = xmlUnescape(text);
|
|
93
|
-
}
|
|
130
|
+
var text = new TextDecoder().decode(value);
|
|
131
|
+
parser.write(text);
|
|
132
|
+
text = null; // Free up memory
|
|
133
|
+
parseXMLOnBrowser();
|
|
94
134
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
})
|
|
135
|
+
})
|
|
136
|
+
}
|
|
98
137
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
})
|
|
138
|
+
console.error("This line should not be hit!");
|
|
139
|
+
|
|
140
|
+
})
|
|
102
141
|
|
|
103
142
|
return emitter;
|
|
104
143
|
}
|
package/modules/interface.js
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* @emits ref Emitted with a single ref object when found
|
|
15
15
|
* @emits end Emitted when parsing has completed
|
|
16
16
|
* @emits error Emitted when an error has been raised
|
|
17
|
+
* @emits progress Emitted as (bytesRead) when reading a stream
|
|
17
18
|
*/
|
|
18
19
|
export function readStream(stream, options) {
|
|
19
20
|
// Stub
|
package/modules/json.js
CHANGED
|
@@ -9,7 +9,8 @@ export function readStream(stream) {
|
|
|
9
9
|
let emitter = Emitter();
|
|
10
10
|
|
|
11
11
|
// Queue up the parser in the next tick (so we can return the emitter first)
|
|
12
|
-
setTimeout(()=>
|
|
12
|
+
setTimeout(()=> {
|
|
13
|
+
stream.on('data', ()=> emitter.emit('progress', stream.bytesRead));
|
|
13
14
|
stream.pipe(
|
|
14
15
|
JSONStream.parse('*')
|
|
15
16
|
.on('data', ref => emitter.emit('ref', {
|
|
@@ -19,7 +20,7 @@ export function readStream(stream) {
|
|
|
19
20
|
.on('end', ()=> emitter.emit('end'))
|
|
20
21
|
.on('error', emitter.emit.bind('error'))
|
|
21
22
|
)
|
|
22
|
-
);
|
|
23
|
+
});
|
|
23
24
|
|
|
24
25
|
return emitter;
|
|
25
26
|
}
|
package/modules/medline.js
CHANGED
|
@@ -122,6 +122,7 @@ export function readStream(stream, options) {
|
|
|
122
122
|
setTimeout(()=> {
|
|
123
123
|
stream
|
|
124
124
|
.on('data', chunkBuffer => {
|
|
125
|
+
emitter.emit('progress', stream.bytesRead);
|
|
125
126
|
buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
|
|
126
127
|
|
|
127
128
|
let bufferCrop = 0; // How many bytes to shift off the front of the buffer based on the last full reference we saw, should end up at the last byte offset of buffer that is valid to shift-truncate to
|
package/modules/ris.js
CHANGED
|
@@ -21,6 +21,7 @@ export function readStream(stream, options) {
|
|
|
21
21
|
setTimeout(()=> {
|
|
22
22
|
stream
|
|
23
23
|
.on('data', chunkBuffer => {
|
|
24
|
+
emitter.emit('progress', stream.bytesRead);
|
|
24
25
|
buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
|
|
25
26
|
|
|
26
27
|
let bufferCrop = 0; // How many bytes to shift off the front of the buffer based on the last full reference we saw, should end up at the last byte offset of buffer that is valid to shift-truncate to
|
package/package.json
CHANGED
package/shared/streamEmitter.js
CHANGED
|
@@ -3,7 +3,7 @@ import Emitter from '../shared/emitter.js';
|
|
|
3
3
|
/**
|
|
4
4
|
* Wrapper for streams which transforms a given input into an emitter pattern
|
|
5
5
|
* This is designed to let regular `node:stream.Readable` objects pass through without alteration but browser based stream objects get wrapped
|
|
6
|
-
* @param {stream.Readable|
|
|
6
|
+
* @param {stream.Readable|ReadableStream} inStream The input stream to wrap
|
|
7
7
|
* @returns {stream.Readable|Emitter} Either the unedited node compatible stream or an event emitter with the same behaviour
|
|
8
8
|
*
|
|
9
9
|
* @emits data Emitted as `(chunk)` on each data chunk
|
|
@@ -11,27 +11,8 @@ import Emitter from '../shared/emitter.js';
|
|
|
11
11
|
* @emits error Emitted as `(Error)` on any read error
|
|
12
12
|
*/
|
|
13
13
|
export default function streamEmitter(inStream) {
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
let readCycle = ()=> {
|
|
19
|
-
inStream
|
|
20
|
-
.read()
|
|
21
|
-
.then(({value, done}) => {
|
|
22
|
-
if (done) {
|
|
23
|
-
emitter.emit('end');
|
|
24
|
-
} else {
|
|
25
|
-
emitter.emit('data', utf8Decoder.decode(value, {stream: true}));
|
|
26
|
-
setTimeout(readCycle); // Loop into next read if not already finished
|
|
27
|
-
}
|
|
28
|
-
})
|
|
29
|
-
.catch(e => emitter.emit('error', e))
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
// Keep downstream libraries happy by stubbing stream-like functions
|
|
33
|
-
emitter.setEncoding = ()=> {};
|
|
34
|
-
|
|
35
|
-
setTimeout(readCycle); // Queue up initial read cycle on next tick
|
|
36
|
-
return emitter;
|
|
14
|
+
// FIXME: Need to examine inStream and multiplex
|
|
15
|
+
// inStream.pipeTo - a browser stream - passthru
|
|
16
|
+
// !inStream.pipeTo - probably Node stream - need to glue pipeTo as a promiseable
|
|
17
|
+
return inStream;
|
|
37
18
|
}
|