@iebh/reflib 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/modules/ris.js ADDED
@@ -0,0 +1,345 @@
1
+ import Emitter from '../shared/emitter.js';
2
+
3
+ /**
4
+ * @see modules/interface.js
5
+ * @param {Object} [options] Additional options to use when parsing
6
+ * @param {string} [options.defaultType='report'] Default citation type to assume when no other type is specified
7
+ * @param {string} [options.delimeter='\r'] How to split multi-line items
8
+ */
9
+ export function readStream(stream, options) {
10
+ let settings = {
11
+ defaultType: 'journalArticle',
12
+ delimeter: '\r',
13
+ ...options,
14
+ };
15
+
16
+ let emitter = Emitter();
17
+
18
+ let buffer = ''; // Incomming text buffer lines if the chunk we're given isn't enough to parse a reference yet
19
+
20
+ // Queue up the parser in the next tick (so we can return the emitter first)
21
+ setTimeout(()=> {
22
+ stream
23
+ .on('data', chunkBuffer => {
24
+ buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
25
+
26
+ let bufferCrop = 0; // How many bytes to shift off the front of the buffer based on the last full reference we saw, should end up at the last byte offset of buffer that is valid to shift-truncate to
27
+ let bufferSplitter = /(\r\n|\n)ER\s+-\s*(\r\n|\n)/g; // RegExp to use per segment (multiple calls to .exec() stores state because JS is a hellscape)
28
+
29
+ let bufferSegment;
30
+ while (bufferSegment = bufferSplitter.exec(buffer)) {
31
+ let parsedRef = parseRef(buffer.substring(bufferCrop, bufferSegment.index), settings); // Parse the ref from the start+end points
32
+
33
+ emitter.emit('ref', parsedRef);
34
+
35
+ bufferCrop = bufferSegment.index + bufferSegment[0].length; // Set start of next ref + cropping index to last seen offset + match
36
+ }
37
+
38
+ buffer = buffer.substring(bufferCrop); // Shift-truncate the buffer so we're ready to input more data on the next call
39
+ })
40
+ .on('error', e => emitter.emit('error', e))
41
+ .on('end', ()=> {
42
+ if (buffer.replace(/\s+/, '')) { // Anything left in the to-drain buffer?
43
+ // Drain remaining buffer into parser before exiting
44
+ emitter.emit('ref', parseRef(buffer, settings));
45
+ }
46
+
47
+ // Signal that we're done
48
+ emitter.emit('end');
49
+ })
50
+ })
51
+
52
+ return emitter;
53
+ }
54
+
55
+
56
+ /**
57
+ * @see modules/interface.js
58
+ * @param {Object} [options] Additional options to use when parsing
59
+ * @param {string} [options.defaultType='journalArticle'] Default citation type to assume when no other type is specified
60
+ * @param {string} [options.delimeter='\r'] How to split multi-line items
61
+ */
62
+ export function writeStream(stream, options) {
63
+ let settings = {
64
+ defaultType: 'journalArticle',
65
+ delimeter: '\r',
66
+ ...options,
67
+ };
68
+
69
+
70
+ return {
71
+ start() {
72
+ return Promise.resolve();
73
+ },
74
+ write: xRef => {
75
+ let ref = { // Assign defaults if not already present
76
+ type: settings.defaultType,
77
+ title: '<NO TITLE>',
78
+ ...xRef,
79
+ };
80
+
81
+ // Parse `pages` back into `_pageStart` + `_pageEnd` meta keys
82
+ if (xRef.pages) {
83
+ var pageRanges = /^(?<_pageStart>.+?)(-(?<_pageEnd>.+))?$/.exec(xRef.pages)?.groups;
84
+ Object.assign(ref, pageRanges);
85
+ delete ref.pages;
86
+ }
87
+
88
+ let a ;
89
+ stream.write(
90
+ a = translations.fields.collectionOutput
91
+ .filter(f => ref[f.rl]) // Has field?
92
+ .flatMap(f =>
93
+ f.rl == 'type' // Translate type field
94
+ ? 'TY - ' + (translations.types.rlMap[ref.type] || 'JOUR')
95
+ : f.outputRepeat && Array.isArray(ref[f.rl]) // Repeat array types
96
+ ? ref[f.rl].map(item => `${f.raw} - ${item}`)
97
+ : Array.isArray(ref[f.rl]) // Flatten arrays into text
98
+ ? `${f.raw} - ${ref[f.rl].join(settings.delimeter)}`
99
+ : `${f.raw} - ${ref[f.rl]}` // Regular field output
100
+ )
101
+ .concat(['ER - \n\n'])
102
+ .join('\n')
103
+ );
104
+
105
+ return Promise.resolve();
106
+ },
107
+ end() {
108
+ return new Promise((resolve, reject) =>
109
+ stream.end(err => err ? reject(err) : resolve())
110
+ );
111
+ },
112
+ };
113
+ }
114
+
115
+
116
+ /**
117
+ * Parse a single RIS format reference from a block of text
118
+ * This function is used internally by parseStream() for each individual reference
119
+ * @param {string} refString Raw RIS string composing the start -> end of the ref
120
+ * @param {Object} settings Additional settings to pass, this should be initialized + parsed by the calling function for efficiency, see readStream() for full spec
121
+ */
122
+ export function parseRef(refString, settings) {
123
+ let ref = {}; // Reference under construction
124
+ let lastField; // Last field object we saw, used to append values if they don't match the default RIS key=val one-liner
125
+
126
+ refString
127
+ .split(/[\r\n|\n]/) // Split into lines
128
+ .forEach(line => {
129
+ let parsedLine = /^\s*(?<key>[A-Z0-9]+?)\s+-\s+(?<value>.*)$/s.exec(line)?.groups;
130
+ if (!parsedLine) { // Doesn't match key=val spec
131
+ if (line.replace(/\s+/, '') && lastField) { // Line isn't just whitespace + We have a field to append to - append with \r delimiters
132
+ if (lastField.inputArray) { // Treat each line feed like an array entry
133
+ ref[lastField.rl].push(line);
134
+ } else { // Assume we append each line entry as a string with settings.delimeter
135
+ ref[lastField.rl] += settings.delimeter + line;
136
+ }
137
+ }
138
+ return; // Stop processing this line
139
+ }
140
+
141
+ if (parsedLine.key == 'ER') return; // Skip 'ER' defiition lines - this is probably due to the buffer draining
142
+ let fieldLookup = translations.fields.rawMap.get(parsedLine.key);
143
+
144
+ if (!fieldLookup) { // Skip unknown field translations
145
+ lastField = null;
146
+ return;
147
+ } else if (fieldLookup.rl == 'type') { // Special handling for ref types
148
+ ref[fieldLookup.rl] = translations.types.rawMap.get(parsedLine.value)?.rl || settings.defaultType;
149
+ lastField = fieldLookup; // Track last key so we can append to it on the next cycle
150
+ } else if (fieldLookup.inputArray) { // Should this `rl` key be treated like an appendable array?
151
+ if (!ref[fieldLookup.rl]) { // Array doesn't exist yet
152
+ ref[fieldLookup.rl] = [parsedLine.value];
153
+ } else {
154
+ ref[fieldLookup.rl].push(parsedLine.value);
155
+ }
156
+ lastField = fieldLookup;
157
+ } else { // Simple key=val
158
+ ref[fieldLookup.rl] = parsedLine.value;
159
+ lastField = fieldLookup;
160
+ }
161
+ })
162
+
163
+ // Post processing
164
+ // Page mangling {{{
165
+ if (ref._pageStart || ref._pageEnd) {
166
+ ref.pages = [ref._pageStart, ref._pageEnd]
167
+ .filter(Boolean) // Remove duds
168
+ .join('-');
169
+ delete ref._pageStart;
170
+ delete ref._pageEnd;
171
+ }
172
+ // }}}
173
+
174
+ return ref;
175
+ }
176
+
177
+
178
+ /**
179
+ * Lookup tables for this module
180
+ * @type {Object}
181
+ * @property {array<Object>} fields Field translations between RefLib (`rl`) and the raw format (`raw`)
182
+ * @property {array<Object>} types Field translations between RefLib (`rl`) and the raw format types as raw text (`rawText`) and numeric ID (`rawId`)
183
+ * @property {boolean} isArray Whether the field should append to any existing `rl` field and be treated like an array of data
184
+ * @property {number|boolean} [sort] Sort order when outputting, use boolean `false` to disable the field on output
185
+ * @property {boolean} [outputRepeat=false] Whether to repeat the output field if multiple values are present, if disabled arrays are flattened into a string with newlines instead
186
+ * @property {boolean} [inputArray=false] Forcably cast the field as an array when reading, even if there is only one value
187
+ */
188
+ export let translations = {
189
+ // Field translations {{{
190
+ fields: {
191
+ collection: [
192
+ {rl: 'authors', raw: 'A1', sort: false, inputArray: true},
193
+ {rl: 'authors', raw: 'A2', sort: false, inputArray: true},
194
+ {rl: 'authors', raw: 'A3', sort: false, inputArray: true},
195
+ {rl: 'authors', raw: 'A4', sort: false, inputArray: true},
196
+ {rl: 'abstract', raw: 'AB'},
197
+ {rl: 'address', raw: 'AD'},
198
+ {rl: 'accession', raw: 'AN'},
199
+ {rl: 'authors', raw: 'AU', sort: 4, outputRepeat: true, inputArray: true},
200
+ {rl: 'custom1', raw: 'C1'},
201
+ {rl: 'custom2', raw: 'C2'},
202
+ {rl: 'custom3', raw: 'C3'},
203
+ {rl: 'custom4', raw: 'C4'},
204
+ {rl: 'custom5', raw: 'C5'},
205
+ {rl: 'custom6', raw: 'C6'},
206
+ {rl: 'custom7', raw: 'C7'},
207
+ {rl: 'custom8', raw: 'C8'},
208
+ {rl: 'caption', raw: 'CA'},
209
+ {rl: 'fallbackCity', raw: 'CY'},
210
+ {rl: 'date', raw: 'DA'},
211
+ {rl: 'database', raw: 'DB'},
212
+ {rl: 'doi', raw: 'DO'},
213
+ {rl: 'databaseProvider', raw: 'DP'},
214
+ {rl: '_pageEnd', raw: 'EP', sort: 6},
215
+ {rl: 'edition', raw: 'ET', sort: 7},
216
+ {rl: 'number', raw: 'IS', sort: 8},
217
+ {rl: 'journal', raw: 'J1', sort: false},
218
+ {rl: 'journal', raw: 'JF', sort: 3},
219
+ {rl: 'keywords', raw: 'KW', outputRepeat: true, inputArray: true},
220
+ {rl: 'urls', raw: 'L1', sort: false, inputArray: true},
221
+ {rl: 'urls', raw: 'L2', sort: false, inputArray: true},
222
+ {rl: 'urls', raw: 'L3', sort: false, inputArray: true},
223
+ {rl: 'urls', raw: 'L4', sort: false, inputArray: true},
224
+ {rl: 'language', raw: 'LA'},
225
+ {rl: 'label', raw: 'LB'},
226
+ {rl: 'urls', raw: 'LK'},
227
+ {rl: 'notes', raw: 'N1'},
228
+ {rl: 'fallbackAbstract', raw: 'N2'},
229
+ {rl: 'publisher', raw: 'PB'},
230
+ {rl: 'year', raw: 'PY'},
231
+ {rl: 'isbn', raw: 'SN'},
232
+ {rl: '_pageStart', raw: 'SP', sort: 5},
233
+ {rl: 'title', raw: 'T1', sort: false},
234
+ {rl: 'journal', raw: 'T2', sort: false},
235
+ {rl: 'title', raw: 'TI', sort: 1},
236
+ {rl: 'type', raw: 'TY', sort: 0}, // TY must be the lowest number
237
+ {rl: 'urls', raw: 'UR', outputRepeat: true, inputArray: true},
238
+ {rl: 'volume', raw: 'VL'},
239
+ {rl: 'date', raw: 'Y1'},
240
+ {rl: 'accessDate', raw: 'Y2'},
241
+
242
+ // These are non-standard fields but we keep these here anyway to prevent data loss
243
+ {rl: 'RISID', raw: 'ID'},
244
+ {rl: 'RISShortTitle', raw: 'ST'},
245
+ {rl: 'RISOriginalPublication', raw: 'OP'},
246
+ ],
247
+ collectionOutput: [], // Sorted + filtered version of the above to use when outputting
248
+ rawMap: new Map(), // Calculated later for quicker lookup
249
+ rlMap: new Map(), // Calculated later for quicker lookup
250
+ },
251
+ // }}}
252
+ // Ref type translations {{{
253
+ types: {
254
+ collection: [
255
+ // Place high-priority translations at the top (when we translate BACK we need to know which of multiple keys to prioritize)
256
+ {rl: 'audioVisualMaterial', raw: 'ADVS'},
257
+ {rl: 'journalArticle', raw: 'JOUR'},
258
+ {rl: 'personalCommunication', raw: 'PCOMM'},
259
+ {rl: 'filmOrBroadcast', raw: 'VIDEO'},
260
+
261
+ // Low priority below this line
262
+ {rl: 'unknown', raw: 'ABST'},
263
+ {rl: 'aggregatedDatabase', raw: 'AGGR'},
264
+ {rl: 'ancientText', raw: 'ANCIENT'},
265
+ {rl: 'artwork', raw: 'ART'},
266
+ {rl: 'bill', raw: 'BILL'},
267
+ {rl: 'blog', raw: 'BLOG'},
268
+ {rl: 'book', raw: 'BOOK'},
269
+ {rl: 'case', raw: 'CASE'},
270
+ {rl: 'bookSection', raw: 'CHAP'},
271
+ {rl: 'chartOrTable', raw: 'CHART'},
272
+ {rl: 'classicalWork', raw: 'CLSWK'},
273
+ {rl: 'computerProgram', raw: 'COMP'},
274
+ {rl: 'conferenceProceedings', raw: 'CONF'},
275
+ {rl: 'conferencePaper', raw: 'CPAPER'},
276
+ {rl: 'catalog', raw: 'CTLG'},
277
+ {rl: 'dataset', raw: 'DATA'},
278
+ {rl: 'onlineDatabase', raw: 'DBASE'},
279
+ {rl: 'dictionary', raw: 'DICT'},
280
+ {rl: 'electronicBook', raw: 'EBOOK'},
281
+ {rl: 'electronicBookSection', raw: 'ECHAP'},
282
+ {rl: 'editedBook', raw: 'EDBOOK'},
283
+ {rl: 'electronicArticle', raw: 'EJOUR'},
284
+ {rl: 'web', raw: 'ELEC'},
285
+ {rl: 'encyclopedia', raw: 'ENCYC'},
286
+ {rl: 'equation', raw: 'EQUA'},
287
+ {rl: 'figure', raw: 'FIGURE'},
288
+ {rl: 'generic', raw: 'GEN'},
289
+ {rl: 'governmentDocument', raw: 'GOVDOC'},
290
+ {rl: 'grant', raw: 'GRANT'},
291
+ {rl: 'hearing', raw: 'HEARING'},
292
+ {rl: 'personalCommunication', raw: 'ICOMM'},
293
+ {rl: 'newspaperArticle', raw: 'INPR'},
294
+ {rl: 'journalArticle', raw: 'JFULL'},
295
+ {rl: 'legalRuleOrRegulation', raw: 'LEGAL'},
296
+ {rl: 'manuscript', raw: 'MANSCPT'},
297
+ {rl: 'map', raw: 'MAP'},
298
+ {rl: 'magazineArticle', raw: 'MGZN'},
299
+ {rl: 'filmOrBroadcast', raw: 'MPCT'},
300
+ {rl: 'onlineMultimedia', raw: 'MULTI'},
301
+ {rl: 'music', raw: 'MUSIC'},
302
+ {rl: 'newspaperArticle', raw: 'NEWS'},
303
+ {rl: 'pamphlet', raw: 'PAMP'},
304
+ {rl: 'patent', raw: 'PAT'},
305
+ {rl: 'report', raw: 'RPRT'},
306
+ {rl: 'serial', raw: 'SER'},
307
+ {rl: 'audioVisualMaterial', raw: 'SLIDE'},
308
+ {rl: 'audioVisualMaterial', raw: 'SOUND'},
309
+ {rl: 'standard', raw: 'STAND'},
310
+ {rl: 'statute', raw: 'STAT'},
311
+ {rl: 'thesis', raw: 'THES'},
312
+ {rl: 'unpublished', raw: 'UNPB'},
313
+ ],
314
+ rawMap: new Map(), // Calculated later for quicker lookup
315
+ rlMap: new Map(), // Calculated later for quicker lookup
316
+ },
317
+ // }}}
318
+ };
319
+
320
+
321
+ /**
322
+ * @see modules/interface.js
323
+ */
324
+ export function setup() {
325
+ // Sort the field set by sort field
326
+ translations.fields.collectionOutput = translations.fields.collection
327
+ .filter(f => f.sort !== false)
328
+ .sort((a, b) => (a.sort ?? 1000) == (b.sort ?? 1000) ? 0
329
+ : (a.sort ?? 1000) < (b.sort ?? 1000) ? -1
330
+ : 1
331
+ )
332
+
333
+ // Create lookup object of translations.fields with key as .rl / val as the full object
334
+ translations.fields.collection.forEach(c => {
335
+ translations.fields.rlMap.set(c.rl, c);
336
+ translations.fields.rawMap.set(c.raw, c);
337
+ });
338
+
339
+ // Create lookup object of ref.types with key as .rl / val as the full object
340
+ translations.types.collection.forEach(c => {
341
+ translations.types.rlMap.set(c.rl, c);
342
+ translations.types.rawMap.set(c.raw, c);
343
+ });
344
+
345
+ }
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@iebh/reflib",
3
+ "version": "2.0.0",
4
+ "description": "Reference / Citation reference library utilities",
5
+ "scripts": {
6
+ "lint": "eslint lib modules shared test",
7
+ "test": "mocha",
8
+ "test:browser": "cd test/browser && npm run dev"
9
+ },
10
+ "repository": {
11
+ "type": "git",
12
+ "url": "https://github.com/IEBH/RefLib"
13
+ },
14
+ "keywords": [
15
+ "reflib",
16
+ "references",
17
+ "citations",
18
+ "library"
19
+ ],
20
+ "author": "Matt Carter <m@ttcarter.com>",
21
+ "license": "MIT",
22
+ "bugs": {
23
+ "url": "https://github.com/IEBH/RefLib/issues"
24
+ },
25
+ "homepage": "https://github.com/IEBH/RefLib",
26
+ "enginesStrict": true,
27
+ "engines": {
28
+ "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
29
+ },
30
+ "type": "module",
31
+ "exports": {
32
+ ".": {
33
+ "browser": "./lib/browser.js",
34
+ "default": "./lib/default.js"
35
+ },
36
+ "./*": "./lib/*.js"
37
+ },
38
+ "devDependencies": {
39
+ "chai": "^4.3.4",
40
+ "eslint": "^8.7.0",
41
+ "mocha": "^9.1.3",
42
+ "mocha-logger": "^1.0.7",
43
+ "temp": "^0.9.4"
44
+ },
45
+ "dependencies": {
46
+ "htmlparser2": "^7.2.0",
47
+ "JSONStream": "^1.3.5",
48
+ "mitt": "^3.0.0",
49
+ "vite-plugin-replace": "^0.1.1"
50
+ }
51
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Camel case any input string
3
+ * This is functionally the same as Lodash's camelCase() function
4
+ * @param {string} input The input string to camelize
5
+ * @return {string} The input string in camelCase format
6
+ * @url https://github.com/MomsFriendlyDevCo/Nodash
7
+ */
8
+ export default function(input) {
9
+ return input
10
+ .split(/[\s-]/)
11
+ .map((word, offset) => offset == 0
12
+ ? word.toLowerCase()
13
+ : word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase()
14
+ )
15
+ .join('')
16
+ }
@@ -0,0 +1,21 @@
1
+ import Mitt from 'mitt';
2
+
3
+ /**
4
+ * Generic wrapper for an event emitter
5
+ * This module returns a wrapped version of `mitt` stand-alone event emitter (+ support for method chaining)
6
+ */
7
+ export default function emitter() {
8
+ let emitter = Mitt();
9
+
10
+ // Add method chaining
11
+ emitter.mitt = {};
12
+ ['on', 'off', 'emit'].forEach(f => {
13
+ emitter.mitt[f] = emitter[f]; // Backup old function into `.mitt.${F}`
14
+ emitter[f] = (...args) => {
15
+ emitter.mitt[f](...args);
16
+ return emitter;
17
+ };
18
+ });
19
+
20
+ return emitter;
21
+ }
@@ -0,0 +1,37 @@
1
+ import Emitter from '../shared/emitter.js';
2
+
3
+ /**
4
+ * Wrapper for streams which transforms a given input into an emitter pattern
5
+ * This is designed to let regular `node:stream.Readable` objects pass through without alteration but browser based stream objects get wrapped
6
+ * @param {stream.Readable|ReadableStreamDefaultReader} inStream The input stream to wrap
7
+ * @returns {stream.Readable|Emitter} Either the unedited node compatible stream or an event emitter with the same behaviour
8
+ *
9
+ * @emits data Emitted as `(chunk)` on each data chunk
10
+ * @emits end Emitted as `()` when the input stream has closed
11
+ * @emits error Emitted as `(Error)` on any read error
12
+ */
13
+ export default function streamEmitter(inStream) {
14
+ if (inStream.on) return inStream; // inStream already supports event emitters - do nothing
15
+
16
+ let emitter = Emitter();
17
+ let utf8Decoder = new TextDecoder('utf-8');
18
+ let readCycle = ()=> {
19
+ inStream
20
+ .read()
21
+ .then(({value, done}) => {
22
+ if (done) {
23
+ emitter.emit('end');
24
+ } else {
25
+ emitter.emit('data', utf8Decoder.decode(value, {stream: true}));
26
+ setTimeout(readCycle); // Loop into next read if not already finished
27
+ }
28
+ })
29
+ .catch(e => emitter.emit('error', e))
30
+ };
31
+
32
+ // Keep downstream libraries happy by stubbing stream-like functions
33
+ emitter.setEncoding = ()=> {};
34
+
35
+ setTimeout(readCycle); // Queue up initial read cycle on next tick
36
+ return emitter;
37
+ }