@iebh/reflib 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintrc.cjs ADDED
@@ -0,0 +1,14 @@
1
+ module.exports = {
2
+ "env": {
3
+ "browser": true,
4
+ "es2021": true,
5
+ "mocha": true
6
+ },
7
+ "extends": "eslint:recommended",
8
+ "parserOptions": {
9
+ "ecmaVersion": 13,
10
+ "sourceType": "module"
11
+ },
12
+ "rules": {
13
+ }
14
+ };
package/.ignore ADDED
@@ -0,0 +1 @@
1
+ test/data/
package/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Institute for Evidence-Based Healthcare
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,268 @@
1
+ RefLib
2
+ ======
3
+ Reference library processing for Node.
4
+
5
+ This library provides various read/write functionality to process citation libraries and handle individual references (henceforth "Refs").
6
+
7
+ **NOTE: THIS LIBRARY IS STILL UNDER CONSTRUCTION**
8
+ **Please use [Version 1](https://github.com/hash-bang/Reflib-Node) ([NPM](https://www.npmjs.com/package/reflib)) until this message is removed**
9
+
10
+
11
+ Compatibility
12
+ =============
13
+
14
+ | Library | Extension(s) | Read | Write |
15
+ |------------------------|-----------------|--------------------|--------------------|
16
+ | Comma Separated Values | `.csv` | :x: | :x: |
17
+ | EndNote ENL | `.enl` | :x: | :x: |
18
+ | EndNote ENLX | `.enlx` | :x: | :x: |
19
+ | EndNote XML | `.xml` | :heavy_check_mark: | :heavy_check_mark: |
20
+ | JSON | `.json` | :heavy_check_mark: | :heavy_check_mark: |
21
+ | Medline | `.nbib` | :x: | :x: |
22
+ | RIS | `.ris` / `.txt` | :heavy_check_mark: | :heavy_check_mark: |
23
+ | Tab Separated Values | `.tsv` | :x: | :x: |
24
+
25
+
26
+ **Notes on different formats**:
27
+
28
+ * Medline seems to implement a totally different [publication type system](https://www.nlm.nih.gov/mesh/pubtypes.html) than others. RefLib will attempt to guess the best match, storing the original type in the `medlineType` key. Should the citation library be exported _back_ to Medline / `.nbib` files this key will take precedence to avoid data loss
29
+
30
+
31
+ Reference Structure
32
+ ===================
33
+ RefLib creates a simple Plain-Old-JavaScript-Object (POJO) for each reference it parses, or writes to a file format when given a collection of the same.
34
+
35
+ Each reference has the following standardized fields, these are translated from whatever internal format each module uses - e.g. the `TY` RIS field is automatically translated to `title`.
36
+
37
+
38
+ | Field | Type | Description |
39
+ |------------------|-----------------|----------------------------------------------------------------------------------------|
40
+ | recNumber | `number` | The sorting number of the reference. Not present in RIS files |
41
+ | type | `string` | A supported [reference type](#reference-types) (e.g. journalArticle) |
42
+ | title | `string` | The reference's main title |
43
+ | journal | `string` | The reference's secondary title, this is usually the journal for most published papers |
44
+ | authors | `array<string>` | An array of each Author in the originally specified format |
45
+ | date | `string` | The raw, internal date of the reference |
46
+ | urls | `array<string>` | An array of each URL for the reference |
47
+ | pages | `string` | The page reference, usually in the format `123-4` |
48
+ | volume | `string` | |
49
+ | number | `string` | |
50
+ | isbn | `string` | |
51
+ | abstract | `string` | |
52
+ | label | `string` | |
53
+ | caption | `string` | |
54
+ | notes | `string` | |
55
+ | address | `string` | |
56
+ | researchNotes | `string` | |
57
+ | keywords | `array<string>` | Optional list of keywords that apply to the reference |
58
+ | accessDate | `string` | |
59
+ | accession | `string` | |
60
+ | doi | `string` | |
61
+ | section | `string` | |
62
+ | language | `string` | |
63
+ | researchNotes | `string` | |
64
+ | databaseProvider | `string` | |
65
+ | database | `string` | |
66
+ | workType | `string` | |
67
+ | custom1 | `string` | |
68
+ | custom2 | `string` | |
69
+ | custom3 | `string` | |
70
+ | custom4 | `string` | |
71
+ | custom5 | `string` | |
72
+ | custom6 | `string` | |
73
+ | custom7 | `string` | |
74
+
75
+
76
+ Reference Types
77
+ ---------------
78
+ As with refs the following ref types are supported and translated from the module internal formats.
79
+
80
+
81
+ ```
82
+ aggregatedDatabase
83
+ ancientText
84
+ artwork
85
+ audioVisualMaterial
86
+ bill
87
+ blog
88
+ book
89
+ bookSection
90
+ case
91
+ catalog
92
+ chartOrTable
93
+ classicalWork
94
+ computerProgram
95
+ conferencePaper
96
+ conferenceProceedings
97
+ dataset
98
+ dictionary
99
+ editedBook
100
+ electronicArticle
101
+ electronicBook
102
+ electronicBookSection
103
+ encyclopedia
104
+ equation
105
+ figure
106
+ filmOrBroadcast
107
+ generic
108
+ governmentDocument
109
+ grant
110
+ hearing
111
+ journalArticle
112
+ legalRuleOrRegulation
113
+ magazineArticle
114
+ manuscript
115
+ map
116
+ music
117
+ newspaperArticle
118
+ onlineDatabase
119
+ onlineMultimedia
120
+ pamphlet
121
+ patent
122
+ personalCommunication
123
+ report
124
+ serial
125
+ standard
126
+ statute
127
+ thesis
128
+ unknown
129
+ unpublished
130
+ web
131
+ ```
132
+
133
+
134
+
135
+ API
136
+ ===
137
+ Each API is available either from the default `reflib` object or as a separate import.
138
+
139
+
140
+ ```javascript
141
+ import reflib from 'reflib'; // Import everything as `reflib`
142
+ reflib.readFile(path);
143
+ reflib.writeFile(path, refs);
144
+
145
+
146
+ import {readFile, writeFile} from 'reflib'; // Import specific functions
147
+ readFile(path);
148
+ writeFile(path, refs);
149
+ ```
150
+
151
+
152
+ formats
153
+ =======
154
+ Available: Node + Browser
155
+ A lookup object of all supported citation library formats.
156
+ Each key is the unique ID of that module.
157
+
158
+ Properties are:
159
+
160
+ | Key | Type | Description |
161
+ |--------------|-----------------|------------------------------------------------------------------------|
162
+ | `id` | `String` | The unique ID of that module (same as object key) |
163
+ | `title` | `String` | Longer, human readable title of the module |
164
+ | `titleShort` | `String` | Shorter, human readable title of the module |
165
+ | `ext` | `Array<String>` | Array of output file extensions, first extension should be the default |
166
+ | `canRead` | `boolean` | Whether the format is supported when reading a citation library |
167
+ | `canWrite` | `boolean` | Whether the format is supported when writing a citation library |
168
+
169
+
170
+ identifyFormat(path)
171
+ ====================
172
+ Available: Node + Browser
173
+ Attempt to determine the format of a file on disk from its path. The file does not need to actually exist.
174
+
175
+ ```javascript
176
+ identifyFormat('My Refs.csv') //= csv
177
+ identifyFormat('My Refs.json') //= json
178
+ identifyFormat('My Refs.nbib') //= nbib
179
+ identifyFormat('My Refs.txt.ris') //= ris
180
+ identifyFormat('MY REFS.TXT.RIS') //= ris
181
+ identifyFormat('My Refs.data.tsv') //= tsv
182
+ identifyFormat('My Refs.xml') //= endnoteXml
183
+ ```
184
+
185
+
186
+ readFile(path, options)
187
+ =======================
188
+ Available: Node
189
+ Read a file on disk, returning a Promise which will resolve with an array of all Refs extracted.
190
+
191
+ ```javascript
192
+ reflib.readFile('./data/json/json1.json')
193
+ .then(refs => /* Do something with Ref collection */)
194
+ ```
195
+
196
+ uploadFile(options)
197
+ ===================
198
+ Available: Browser
199
+ Prompt the user for a file and process it into an array of citations.
200
+
201
+ ```javascript
202
+ reflib.uploadFile({ // Additional options
203
+ file, // Optional File object if known, if omitted this function will prompt the user to select a file
204
+ onStart, // Async function called as `(File)` when starting the read stage
205
+ onProgress, // Function called as `(position, totalSize)` when processing the file
206
+ onEnd, // Async function called as `()` when the read stage has completed
207
+ })
208
+ .then(refs => /* Do something with Ref collection */)
209
+ ```
210
+
211
+
212
+ writeFile(path, refs, options)
213
+ ==============================
214
+ Available: Node
215
+ Write a file back to disk, returning a Promise which will resolve when done.
216
+
217
+ ```javascript
218
+ reflib.writeFile('MyRefs.xml', refs);
219
+ ```
220
+
221
+
222
+
223
+ readStream(moduleId, inputStream, options)
224
+ ==========================================
225
+ Available: Node + Browser
226
+ Low level worker of `readFile()`.
227
+ Accept an input Stream.Readable and return a emitter which will emit each Ref found.
228
+
229
+ ```javascript
230
+ reflib.readStream('json', createReadStream('./data/json/json1.json'))
231
+ .on('end', ()=> /* Finished reading */)
232
+ .on('error', err => /* Deal with errors */)
233
+ .on('ref', ref => /* Do something with extracted Ref */)
234
+ ```
235
+
236
+
237
+ writeStream(moduleId, outputStream, options)
238
+ ============================================
239
+ Available: Node + Browser
240
+ Low level worker of `writeFile()`.
241
+ Return an object with methods to call to write to a given stream.
242
+ The returned object will have a `start()`, `end()` and `write(ref)` function which can be called to write to the original input stream.
243
+
244
+ ```javascript
245
+ // Convert a JSON file to EndNoteXML via a stream
246
+ let output = reflib.writeStream('json', createWriteStream('./MyRefs.xml'));
247
+
248
+ output.start(); // Begin stream writing
249
+
250
+ reflib.readStream('json', createReadStream('./data/json/json1.json'))
251
+ .on('ref', ref => output.write(ref))
252
+ .on('end', ()=> output.end())
253
+ ```
254
+
255
+
256
+ Credits
257
+ =======
258
+ Developed for the [Bond University Institute for Evidence-Based Healthcare](https://iebh.bond.edu.au).
259
+ Please contact [the author](mailto:matt_carter@bond.edu.au) with any issues.
260
+
261
+
262
+ TODO
263
+ ====
264
+ - [x] Basic parsing iterfaces
265
+ - [ ] "progress" emitter for files
266
+ - [x] `.uploadFile()` browser compatibility
267
+ - [ ] `.downloadFile()` browser compatibility
268
+ - [x] `setup()` functions per module to avoid things like map calculations unless the module is actually needed
package/lib/browser.js ADDED
@@ -0,0 +1,8 @@
1
+ import {identifyFormat} from './identifyFormat.js';
2
+ import {formats} from './formats.js';
3
+ import {getModule} from './getModule.js';
4
+ import {readStream} from './readStream.js';
5
+ import {uploadFile} from './uploadFile.js';
6
+ import {writeStream} from './writeStream.js';
7
+
8
+ export default {identifyFormat, formats, getModule, readStream, uploadFile, writeStream};
package/lib/default.js ADDED
@@ -0,0 +1,7 @@
1
+ export * from './identifyFormat.js';
2
+ export * from './formats.js';
3
+ export * from './getModule.js';
4
+ export * from './readFile.js';
5
+ export * from './readStream.js';
6
+ export * from './writeFile.js';
7
+ export * from './writeStream.js';
package/lib/fields.js ADDED
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Field definitions for RefLib citations
3
+ * @type {Object} An object lookup where each key represents a field within a citation
4
+ * @property {string} type A TypeScript compatible type for that field
5
+ * @property {array<string>} [value] Possible values if the type is restricted
6
+ */
7
+ export let fields = {
8
+ recNumber: {
9
+ type: 'string',
10
+ },
11
+ type: {
12
+ type: 'string',
13
+ values: [
14
+ 'aggregatedDatabase',
15
+ 'ancientText',
16
+ 'artwork',
17
+ 'audioVisualMaterial',
18
+ 'bill',
19
+ 'blog',
20
+ 'book',
21
+ 'bookSection',
22
+ 'case',
23
+ 'catalog',
24
+ 'chartOrTable',
25
+ 'classicalWork',
26
+ 'computerProgram',
27
+ 'conferencePaper',
28
+ 'conferenceProceedings',
29
+ 'dataset',
30
+ 'dictionary',
31
+ 'editedBook',
32
+ 'electronicArticle',
33
+ 'electronicBook',
34
+ 'electronicBookSection',
35
+ 'encyclopedia',
36
+ 'equation',
37
+ 'figure',
38
+ 'filmOrBroadcast',
39
+ 'generic',
40
+ 'governmentDocument',
41
+ 'grant',
42
+ 'hearing',
43
+ 'journalArticle',
44
+ 'legalRuleOrRegulation',
45
+ 'agazineArticle',
46
+ 'manuscript',
47
+ 'map',
48
+ 'music',
49
+ 'newspaperArticle',
50
+ 'onlineDatabase',
51
+ 'onlineMultimedia',
52
+ 'pamphlet',
53
+ 'patent',
54
+ 'personalCommunication',
55
+ 'report',
56
+ 'serial',
57
+ 'standard',
58
+ 'statute',
59
+ 'thesis',
60
+ 'unknown',
61
+ 'unpublished',
62
+ 'web',
63
+ ],
64
+ },
65
+ title: {
66
+ type: 'string',
67
+ },
68
+ journal: {
69
+ type: 'string',
70
+ },
71
+ authors: {
72
+ type: 'array<string>',
73
+ },
74
+ date: {
75
+ type: 'string',
76
+ },
77
+ urls: {
78
+ type: 'array<string>',
79
+ },
80
+ pages: {
81
+ type: 'string',
82
+ },
83
+ volume: {
84
+ type: 'string',
85
+ },
86
+ number: {
87
+ type: 'string',
88
+ },
89
+ isbn: {
90
+ type: 'string',
91
+ },
92
+ abstract: {
93
+ type: 'string',
94
+ },
95
+ label: {
96
+ type: 'string',
97
+ },
98
+ caption: {
99
+ type: 'string',
100
+ },
101
+ notes: {
102
+ type: 'string',
103
+ },
104
+ address: {
105
+ type: 'string',
106
+ },
107
+ researchNotes: {
108
+ type: 'string',
109
+ },
110
+ keywords: {
111
+ type: 'array<string>',
112
+ },
113
+ accessDate: {
114
+ type: 'string',
115
+ },
116
+ accession: {
117
+ type: 'string',
118
+ },
119
+ doi: {
120
+ type: 'string',
121
+ },
122
+ section: {
123
+ type: 'string',
124
+ },
125
+ language: {
126
+ type: 'string',
127
+ },
128
+ researchNotes: {
129
+ type: 'string',
130
+ },
131
+ databaseProvider: {
132
+ type: 'string',
133
+ },
134
+ database: {
135
+ type: 'string',
136
+ },
137
+ workType: {
138
+ type: 'string',
139
+ },
140
+ custom1: {
141
+ type: 'string',
142
+ },
143
+ custom2: {
144
+ type: 'string',
145
+ },
146
+ custom3: {
147
+ type: 'string',
148
+ },
149
+ custom4: {
150
+ type: 'string',
151
+ },
152
+ custom5: {
153
+ type: 'string',
154
+ },
155
+ custom6: {
156
+ type: 'string',
157
+ },
158
+ custom7: {
159
+ type: 'string',
160
+ },
161
+ };
package/lib/formats.js ADDED
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Lookup table of various citation file formats
3
+ * @type {array<Object>} A collection of RefLib supported file formats
4
+ * @property {string} title The long form title of the format
5
+ * @property {string} titleShort Shorter title of the format
6
+ * @property {string} input Input format required by parser
7
+ * @property {string} output Output format required by formatter
8
+ * @property {array>string>} ext File extensions of this format, the first entry is generally used as the output default
9
+ * @property {boolean} canRead Whether the format is supported when reading a citation library
10
+ * @property {boolean} canWrite Whether the format is supported when writing a citation library
11
+ */
12
+ export let formats = {
13
+ csv: {
14
+ id: 'csv',
15
+ title: 'Comma Seperated Values',
16
+ titleShort: 'CSV',
17
+ ext: ['.csv'],
18
+ canRead: false,
19
+ canWrite: false,
20
+ },
21
+ endnoteXml: {
22
+ id: 'endnoteXml',
23
+ title: 'EndNoteXML',
24
+ titleShort: 'EndNoteXML',
25
+ ext: ['.xml'],
26
+ canRead: true,
27
+ canWrite: true,
28
+ },
29
+ json: {
30
+ id: 'json',
31
+ title: 'JSON',
32
+ titleShort: 'JSON',
33
+ ext: ['.json'],
34
+ canRead: true,
35
+ canWrite: true,
36
+ },
37
+ medline: {
38
+ id: 'medline',
39
+ title: 'MEDLINE / PubMed',
40
+ titleShort: 'MEDLINE',
41
+ ext: ['.nbib'],
42
+ canRead: false,
43
+ canWrite: false,
44
+ },
45
+ ris: {
46
+ id: 'ris',
47
+ title: 'RIS',
48
+ titleShort: 'RIS',
49
+ ext: ['.ris', '.txt'],
50
+ canRead: true,
51
+ canWrite: true,
52
+ },
53
+ tsv: {
54
+ id: 'tsv',
55
+ title: 'Tab Seperated Values',
56
+ titleShort: 'TSV',
57
+ ext: ['.tsv'],
58
+ canRead: false,
59
+ canWrite: false,
60
+ },
61
+ }
@@ -0,0 +1,37 @@
1
+ import * as modules from '../modules/default.js';
2
+
3
+ let hasSetup = new Set(); // Modules we have already setup
4
+
5
+ /**
6
+ * Simple wrapper which loads the named module as a keyed lirary of functions
7
+ * @param {string} module The module ID as per `lib/formats.js`
8
+ * @param {Object} [options] Additional options to use when fetching the module
9
+ * @param {boolean} [options.setup=true] Call the `setup()` function on any module requested before use
10
+ * @return {Object} The loaded module as an object of standardised functionality
11
+ */
12
+ export function getModule(module, options) {
13
+ // Sanity checking
14
+ if (!module) throw new Error('No module provided');
15
+
16
+ // Argument mangling
17
+ let settings = {
18
+ setup: true,
19
+ ...options,
20
+ };
21
+
22
+ // Try to find the module
23
+ let mod = modules[module];
24
+ if (!mod) throw new Error(`Unknown module "${module}"`);
25
+
26
+ // Should setup and module exposes a setup function?
27
+ if (
28
+ mod.setup // We should set up...
29
+ && settings.setup // AND the module has a function to do so...
30
+ && !hasSetup.has(module) // AND we've not setup before
31
+ ) {
32
+ hasSetup.add(module);
33
+ mod.setup();
34
+ }
35
+
36
+ return mod;
37
+ }
@@ -0,0 +1,13 @@
1
+ import {formats} from '../lib/formats.js';
2
+
3
+ /**
4
+ * Identify and return the Reflib format (from ./formats) to use for the given file name / path
5
+ * @param {string} path The input path to identify
6
+ * @returns {Object} A matching entry from ./formats or null if no matching format was found
7
+ */
8
+ export function identifyFormat(path) {
9
+ let ext = /^.*(?<ext>\..+?)$/.exec(path)?.groups.ext.toLowerCase();
10
+ if (!ext) return null;
11
+
12
+ return Object.values(formats).find(format => format.ext.includes(ext));
13
+ }
@@ -0,0 +1,34 @@
1
+ import {createReadStream} from 'node:fs';
2
+ import {stat} from 'node:fs/promises';
3
+ import {identifyFormat} from './identifyFormat.js';
4
+ import {readStream} from './readStream.js';
5
+
6
+ /**
7
+ * Parse a file directly from a path
8
+ * This function is a warpper around the readStream handler + some Promise magic
9
+ * @param {string} path The file path to parse
10
+ * @param {Object} [options] Additional options to pass to the parser
11
+ * @param {string} [options.module] The module to use if overriding from the file path
12
+ * @returns {Promise<Array>} An eventual array of all references parsed from the file
13
+ */
14
+ export function readFile(path, options) {
15
+ let settings = {...options};
16
+ let module = options?.module || identifyFormat(path)?.id;
17
+ if (!module) throw new Error(`Unable to identify reference library format for file path "${path}"`);
18
+
19
+ return stat(path)
20
+ .then(stats => new Promise((resolve, reject) => {
21
+ let refs = [];
22
+ readStream(
23
+ module,
24
+ createReadStream(path),
25
+ {
26
+ ...settings,
27
+ size: stats.size,
28
+ },
29
+ )
30
+ .on('end', ()=> resolve(refs))
31
+ .on('error', reject)
32
+ .on('ref', ref => refs.push(ref))
33
+ }))
34
+ }
@@ -0,0 +1,21 @@
1
+ import {getModule} from './getModule.js';
2
+
3
+ /**
4
+ * Parse an input stream via a given format ID
5
+ * This function is really just a multiplexor around each modules `readStream` export
6
+ * @param {string} module The module ID as per `lib/formats.js`
7
+ * @param {Stream.Readable} stream Input stream to parse
8
+ * @param {Object} [options] Additional options to pass to the parser
9
+ * @param {number} [options.size] Size of the input stream, if omitted `progress` events are not emitted
10
+ * @returns {EventEmitter} An Event-Emitter compatible object which will fire various events while parsing
11
+ *
12
+ * @emits ref Emitted with an extracted reference object during parse
13
+ * @emits end Emitted when the parsing has completed
14
+ * @emits error Emitted with an Error object if any occured
15
+ */
16
+ export function readStream(module, stream, options) {
17
+ if (!module) throw new Error('No module provided to parse with');
18
+ if (!stream) throw new Error('No stream provided to parse');
19
+
20
+ return getModule(module).readStream(stream, options);
21
+ }