@iebh/reflib 2.0.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -193,6 +193,19 @@ reflib.readFile('./data/json/json1.json')
193
193
  .then(refs => /* Do something with Ref collection */)
194
194
  ```
195
195
 
196
+ An emitter is available to track progress while reading. Note that due to the chainable nature of promises the first return contains the `emitter` key only:
197
+
198
+ ```javascript
199
+ let reader = reflib.readFile('./data/json/json1.json');
200
+
201
+ reader.emitter
202
+ .on('progress', ({readBytes, totalSize, refsFound}) => /* Report progress somehow */);
203
+ .on('end', ({refsFound}) => /* Report progress somehow */);
204
+
205
+ reader
206
+ .then(refs => /* Do something with Ref collection */)
207
+ ```
208
+
196
209
  uploadFile(options)
197
210
  ===================
198
211
  Available: Browser
@@ -257,12 +270,3 @@ Credits
257
270
  =======
258
271
  Developed for the [Bond University Institute for Evidence-Based Healthcare](https://iebh.bond.edu.au).
259
272
  Please contact [the author](mailto:matt_carter@bond.edu.au) with any issues.
260
-
261
-
262
- TODO
263
- ====
264
- - [x] Basic parsing iterfaces
265
- - [ ] "progress" emitter for files
266
- - [x] `.uploadFile()` browser compatibility
267
- - [ ] `.downloadFile()` browser compatibility
268
- - [x] `setup()` functions per module to avoid things like map calculations unless the module is actually needed
package/lib/readFile.js CHANGED
@@ -1,4 +1,5 @@
1
1
  import {createReadStream} from 'node:fs';
2
+ import Emitter from '../shared/emitter.js';
2
3
  import {stat} from 'node:fs/promises';
3
4
  import {identifyFormat} from './identifyFormat.js';
4
5
  import {readStream} from './readStream.js';
@@ -9,16 +10,26 @@ import {readStream} from './readStream.js';
9
10
  * @param {string} path The file path to parse
10
11
  * @param {Object} [options] Additional options to pass to the parser
11
12
  * @param {string} [options.module] The module to use if overriding from the file path
13
+ *
12
14
  * @returns {Promise<Array>} An eventual array of all references parsed from the file
15
+ * @property {EventEmitter} emitter An event emitter which will fire the below events
16
+ *
17
+ * @fires progress Emitted as `({readBytes: Number, totalSize: Number, refsFound: Number})`
18
+ * @fires end Emitted as `({refsFound: Number})` when the reading operation has completed
13
19
  */
14
20
  export function readFile(path, options) {
15
- let settings = {...options};
21
+ let settings = {
22
+ progressTotal: false,
23
+ ...options,
24
+ };
16
25
  let module = options?.module || identifyFormat(path)?.id;
17
26
  if (!module) throw new Error(`Unable to identify reference library format for file path "${path}"`);
18
27
 
19
- return stat(path)
28
+ let promiseEmitter = Promise.resolve()
29
+ .then(()=> stat(path))
20
30
  .then(stats => new Promise((resolve, reject) => {
21
- let refs = [];
31
+ let refs = []; // eslint-disable-line no-unused-vars
32
+
22
33
  readStream(
23
34
  module,
24
35
  createReadStream(path),
@@ -30,5 +41,23 @@ export function readFile(path, options) {
30
41
  .on('end', ()=> resolve(refs))
31
42
  .on('error', reject)
32
43
  .on('ref', ref => refs.push(ref))
44
+ .on('progress', readBytes => promiseEmitter.emitter.emit('progress', {
45
+ readBytes,
46
+ totalSize: stats.size,
47
+ refsFound: refs.length,
48
+ }))
33
49
  }))
50
+ .then(refs => {
51
+ promiseEmitter.emitter.emit('end', {refsFound: refs.length})
52
+ return refs;
53
+ })
54
+
55
+ // Extend our base promise with an emitter subkey
56
+ return Object.defineProperties(promiseEmitter, {
57
+ emitter: {
58
+ value: Emitter(),
59
+ enumerable: true,
60
+ writable: false,
61
+ },
62
+ });
34
63
  }
package/lib/uploadFile.js CHANGED
@@ -6,7 +6,7 @@ import StreamEmitter from '../shared/streamEmitter.js';
6
6
  /**
7
7
  * Prompt the user for a file then read it as a Reflib event emitter
8
8
  * @param {Object} [options] Additional options when prompting the user
9
- * @param {File} [options.files] The File object to process, omitting this will prompt the user to select a file
9
+ * @param {File} [options.file] The File object to process, omitting this will prompt the user to select a file
10
10
  * @param {function} [options.onStart] Async function called as `(File)` when starting the read stage
11
11
  * @param {function} [options.onProgress] Function called as `(position, totalSize)` when processing the file
12
12
  * @param {function} [options.onEnd] Async function called as `()` when the read stage has completed
@@ -49,9 +49,10 @@ export function uploadFile(options) {
49
49
  return Promise.resolve()
50
50
  .then(()=> settings.onStart && settings.onStart(settings.file))
51
51
  .then(()=> new Promise((resolve, reject) => {
52
+
52
53
  let streamer = readStream(
53
54
  identifiedType.id,
54
- StreamEmitter(settings.file.stream().getReader()),
55
+ StreamEmitter(settings.file.stream()),
55
56
  {
56
57
  ...settings,
57
58
  size: settings.file.size,
@@ -1,7 +1,9 @@
1
1
  import camelCase from '../shared/camelCase.js';
2
2
  import Emitter from '../shared/emitter.js';
3
- import {WritableStream as XMLParser} from 'htmlparser2/lib/WritableStream';
4
3
 
4
+ // TODO: CF: Don't need to import both, it depends if we are on browser or node
5
+ import * as htmlparser2 from "htmlparser2";
6
+ import {WritableStream as XMLParser} from 'htmlparser2/lib/WritableStream';
5
7
 
6
8
  /**
7
9
  * @see modules/interface.js
@@ -30,75 +32,112 @@ export function readStream(stream) {
30
32
  */
31
33
  let textAppend = false;
32
34
 
35
+ /**
36
+ * The options/callbacks for the parser
37
+ * @type {Object}
38
+ */
39
+ let parserOptions = {
40
+ xmlMode: true,
41
+ decodeEntities: false, // Handled below
42
+ onopentag(name, attrs) {
43
+ textAppend = false;
44
+ stack.push({
45
+ name: camelCase(name),
46
+ attrs,
47
+ });
48
+ },
49
+ onclosetag(name) {
50
+ if (name == 'record') {
51
+ if (ref.title) ref.title = ref.title // htmlparser2 handles the '<title>' tag in a really bizare way so we have to pull apart the <style> bits when parsing
52
+ .replace(/^.*<style.*>(.*)<\/style>.*$/m, '$1')
53
+ .replace(/^\s+/, '')
54
+ .replace(/\s+$/, '')
55
+ emitter.emit('ref', translateRawToRef(ref));
56
+ stack = []; // Trash entire stack when hitting end of <record/> node
57
+ ref = {}; // Reset the ref state
58
+ } else {
59
+ stack.pop();
60
+ }
61
+ },
62
+ ontext(text) {
63
+ let parentName = stack[stack.length - 1]?.name;
64
+ let gParentName = stack[stack.length - 2]?.name;
65
+ if (parentName == 'title') {
66
+ if (textAppend) {
67
+ ref.title += text;
68
+ } else {
69
+ ref.title = text;
70
+ }
71
+ } else if (parentName == 'style' && gParentName == 'author') {
72
+ if (!ref.authors) ref.authors = [];
73
+ if (textAppend) {
74
+ ref.authors[ref.authors.length - 1] += xmlUnescape(text);
75
+ } else {
76
+ ref.authors.push(xmlUnescape(text));
77
+ }
78
+ } else if (parentName == 'style' && gParentName == 'keyword') {
79
+ if (!ref.keywords) ref.keywords = [];
80
+ if (textAppend) {
81
+ ref.keywords[ref.keywords.length - 1] += xmlUnescape(text);
82
+ } else {
83
+ ref.keywords.push(xmlUnescape(text));
84
+ }
85
+ } else if (parentName == 'style') { // Text within <style/> tag
86
+ if (textAppend || ref[gParentName]) { // Text already exists? Append (handles node-expats silly multi-text per escape character "feature")
87
+ ref[gParentName] += xmlUnescape(text);
88
+ } else {
89
+ ref[gParentName] = xmlUnescape(text);
90
+ }
91
+ } else if (['recNumber', 'refType'].includes(parentName)) { // Simple setters like <rec-number/>
92
+ if (textAppend || ref[parentName]) {
93
+ ref[parentName] += xmlUnescape(text);
94
+ } else {
95
+ ref[parentName] = xmlUnescape(text);
96
+ }
97
+ }
98
+ textAppend = true; // Always set the next call to the text emitter handler as an append operation
99
+ },
100
+ onend() {
101
+ emitter.emit('end');
102
+ }
103
+ }
33
104
 
34
105
  // Queue up the parser in the next tick (so we can return the emitter first)
35
- setTimeout(()=> {
36
- let parser = new XMLParser({
37
- xmlMode: true,
38
- decodeEntities: false, // Handled below
39
- onopentag(name, attrs) {
40
- textAppend = false;
41
- stack.push({
42
- name: camelCase(name),
43
- attrs,
44
- });
45
- },
46
- onclosetag(name) {
47
- if (name == 'record') {
48
- if (ref.title) ref.title = ref.title // htmlparser2 handles the '<title>' tag in a really bizare way so we have to pull apart the <style> bits when parsing
49
- .replace(/^.*<style.*>(.*)<\/style>.*$/m, '$1')
50
- .replace(/^\s+/, '')
51
- .replace(/\s+$/, '')
52
- emitter.emit('ref', translateRawToRef(ref));
53
- stack = []; // Trash entire stack when hitting end of <record/> node
54
- ref = {}; // Reset the ref state
106
+ setTimeout(() => {
107
+
108
+ if (typeof stream.pipe === 'function') {
109
+ // We are on the node.js client
110
+ let parser = new XMLParser(parserOptions);
111
+ stream.on('data', ()=> emitter.emit('progress', stream.bytesRead))
112
+ stream.pipe(parser)
113
+ return;
114
+ }
115
+
116
+ // TODO: CF: We may want to consider moving to a DIY parser for speed and memory efficiency
117
+ if (typeof stream.getReader === 'function') {
118
+ // We are on the browser
119
+ var reader = stream.getReader();
120
+ var parser = new htmlparser2.Parser(parserOptions);
121
+ parseXMLOnBrowser();
122
+ return;
123
+ }
124
+
125
+ function parseXMLOnBrowser() {
126
+ reader.read().then(({done, value}) => {
127
+ if (done) {
128
+ parser.end();
55
129
  } else {
56
- stack.pop();
57
- }
58
- },
59
- ontext(text) {
60
- let parentName = stack[stack.length - 1]?.name;
61
- let gParentName = stack[stack.length - 2]?.name;
62
- if (parentName == 'title') {
63
- if (textAppend) {
64
- ref.title += text;
65
- } else {
66
- ref.title = text;
67
- }
68
- } else if (parentName == 'style' && gParentName == 'author') {
69
- if (!ref.authors) ref.authors = [];
70
- if (textAppend) {
71
- ref.authors[ref.authors.length - 1] += xmlUnescape(text);
72
- } else {
73
- ref.authors.push(xmlUnescape(text));
74
- }
75
- } else if (parentName == 'style' && gParentName == 'keyword') {
76
- if (!ref.keywords) ref.keywords = [];
77
- if (textAppend) {
78
- ref.keywords[ref.keywords.length - 1] += xmlUnescape(text);
79
- } else {
80
- ref.keywords.push(xmlUnescape(text));
81
- }
82
- } else if (parentName == 'style') { // Text within <style/> tag
83
- if (textAppend || ref[gParentName]) { // Text already exists? Append (handles node-expats silly multi-text per escape character "feature")
84
- ref[gParentName] += xmlUnescape(text);
85
- } else {
86
- ref[gParentName] = xmlUnescape(text);
87
- }
88
- } else if (['recNumber', 'refType'].includes(parentName)) { // Simple setters like <rec-number/>
89
- if (textAppend || ref[parentName]) {
90
- ref[parentName] += xmlUnescape(text);
91
- } else {
92
- ref[parentName] = xmlUnescape(text);
93
- }
130
+ var text = new TextDecoder().decode(value);
131
+ parser.write(text);
132
+ text = null; // Free up memory
133
+ parseXMLOnBrowser();
94
134
  }
95
- textAppend = true; // Always set the next call to the text emitter handler as an append operation
96
- },
97
- })
135
+ })
136
+ }
98
137
 
99
- stream.pipe(parser)
100
- .on('finish', ()=> emitter.emit('end'))
101
- });
138
+ console.error("This line should not be hit!");
139
+
140
+ })
102
141
 
103
142
  return emitter;
104
143
  }
@@ -14,6 +14,7 @@
14
14
  * @emits ref Emitted with a single ref object when found
15
15
  * @emits end Emitted when parsing has completed
16
16
  * @emits error Emitted when an error has been raised
17
+ * @emits progress Emitted as (bytesRead) when reading a stream
17
18
  */
18
19
  export function readStream(stream, options) {
19
20
  // Stub
package/modules/json.js CHANGED
@@ -9,7 +9,8 @@ export function readStream(stream) {
9
9
  let emitter = Emitter();
10
10
 
11
11
  // Queue up the parser in the next tick (so we can return the emitter first)
12
- setTimeout(()=>
12
+ setTimeout(()=> {
13
+ stream.on('data', ()=> emitter.emit('progress', stream.bytesRead));
13
14
  stream.pipe(
14
15
  JSONStream.parse('*')
15
16
  .on('data', ref => emitter.emit('ref', {
@@ -19,7 +20,7 @@ export function readStream(stream) {
19
20
  .on('end', ()=> emitter.emit('end'))
20
21
  .on('error', emitter.emit.bind('error'))
21
22
  )
22
- );
23
+ });
23
24
 
24
25
  return emitter;
25
26
  }
@@ -122,6 +122,7 @@ export function readStream(stream, options) {
122
122
  setTimeout(()=> {
123
123
  stream
124
124
  .on('data', chunkBuffer => {
125
+ emitter.emit('progress', stream.bytesRead);
125
126
  buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
126
127
 
127
128
  let bufferCrop = 0; // How many bytes to shift off the front of the buffer based on the last full reference we saw, should end up at the last byte offset of buffer that is valid to shift-truncate to
package/modules/ris.js CHANGED
@@ -21,6 +21,7 @@ export function readStream(stream, options) {
21
21
  setTimeout(()=> {
22
22
  stream
23
23
  .on('data', chunkBuffer => {
24
+ emitter.emit('progress', stream.bytesRead);
24
25
  buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
25
26
 
26
27
  let bufferCrop = 0; // How many bytes to shift off the front of the buffer based on the last full reference we saw, should end up at the last byte offset of buffer that is valid to shift-truncate to
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iebh/reflib",
3
- "version": "2.0.4",
3
+ "version": "2.1.0",
4
4
  "description": "Reference / Citation reference library utilities",
5
5
  "scripts": {
6
6
  "lint": "eslint lib modules shared test",
@@ -3,7 +3,7 @@ import Emitter from '../shared/emitter.js';
3
3
  /**
4
4
  * Wrapper for streams which transforms a given input into an emitter pattern
5
5
  * This is designed to let regular `node:stream.Readable` objects pass through without alteration but browser based stream objects get wrapped
6
- * @param {stream.Readable|ReadableStreamDefaultReader} inStream The input stream to wrap
6
+ * @param {stream.Readable|ReadableStream} inStream The input stream to wrap
7
7
  * @returns {stream.Readable|Emitter} Either the unedited node compatible stream or an event emitter with the same behaviour
8
8
  *
9
9
  * @emits data Emitted as `(chunk)` on each data chunk
@@ -11,27 +11,8 @@ import Emitter from '../shared/emitter.js';
11
11
  * @emits error Emitted as `(Error)` on any read error
12
12
  */
13
13
  export default function streamEmitter(inStream) {
14
- if (inStream.on) return inStream; // inStream already supports event emitters - do nothing
15
-
16
- let emitter = Emitter();
17
- let utf8Decoder = new TextDecoder('utf-8');
18
- let readCycle = ()=> {
19
- inStream
20
- .read()
21
- .then(({value, done}) => {
22
- if (done) {
23
- emitter.emit('end');
24
- } else {
25
- emitter.emit('data', utf8Decoder.decode(value, {stream: true}));
26
- setTimeout(readCycle); // Loop into next read if not already finished
27
- }
28
- })
29
- .catch(e => emitter.emit('error', e))
30
- };
31
-
32
- // Keep downstream libraries happy by stubbing stream-like functions
33
- emitter.setEncoding = ()=> {};
34
-
35
- setTimeout(readCycle); // Queue up initial read cycle on next tick
36
- return emitter;
14
+ // FIXME: Need to examine inStream and multiplex
15
+ // inStream.pipeTo - a browser stream - passthru
16
+ // !inStream.pipeTo - probably Node stream - need to glue pipeTo as a promiseable
17
+ return inStream;
37
18
  }