@iebh/reflib 2.7.2 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.ignore +1 -1
- package/LICENSE +20 -20
- package/README.md +297 -274
- package/app.js +87 -0
- package/lib/browser.js +30 -30
- package/lib/default.js +30 -30
- package/lib/downloadFile.js +94 -94
- package/lib/fields.js +158 -158
- package/lib/formats.js +85 -85
- package/lib/getModule.js +39 -39
- package/lib/getRefDoi.js +16 -16
- package/lib/identifyFormat.js +13 -13
- package/lib/readFile.js +63 -63
- package/lib/readStream.js +21 -21
- package/lib/uploadFile.js +71 -71
- package/lib/writeFile.js +32 -32
- package/lib/writeStream.js +16 -16
- package/modules/bibtex.js +401 -401
- package/modules/default.js +7 -7
- package/modules/endnoteEnl.js +237 -237
- package/modules/endnoteEnlX.js +85 -85
- package/modules/endnoteXml.js +410 -474
- package/modules/interface.js +47 -47
- package/modules/json.js +109 -79
- package/modules/medline.js +638 -638
- package/modules/ris.js +383 -383
- package/modules/shims/JSONStream-browser.js +43 -43
- package/modules/shims/WritableStream-browser.js +52 -52
- package/package.json +68 -66
- package/shared/camelCase.js +17 -17
- package/shared/emitter.js +23 -23
- package/shared/parseArgs.js +104 -0
- package/shared/streamEmitter.js +61 -61
package/modules/bibtex.js
CHANGED
|
@@ -1,401 +1,401 @@
|
|
|
1
|
-
import Emitter from '../shared/emitter.js';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* Lookup enum for the current parser mode we are in
|
|
5
|
-
*
|
|
6
|
-
* @type {Object<Number>}
|
|
7
|
-
*/
|
|
8
|
-
const MODES = {
|
|
9
|
-
REF: 0,
|
|
10
|
-
FIELDS: 1,
|
|
11
|
-
FIELD_START: 2,
|
|
12
|
-
FIELD_VALUE: 3,
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
/**
|
|
17
|
-
* Parse a BibTeX file from a readable stream
|
|
18
|
-
*
|
|
19
|
-
* @see modules/interface.js
|
|
20
|
-
*
|
|
21
|
-
* @param {Stream} stream The readable stream to accept data from
|
|
22
|
-
* @param {Object} [options] Additional options to use when parsing
|
|
23
|
-
* @param {Boolean} [options.recNumberNumeric=true] Only process the BibTeX ID into a recNumber if its a finite numeric, otherwise disguard
|
|
24
|
-
* @param {Boolean} [options.recNumberRNPrefix=true] Accept `RN${NUMBER}` as recNumber if present
|
|
25
|
-
* @param {Boolean} [options.recNumberKey=true] If the reference key cannot be otherwise parsed store it in `key<String>` instead
|
|
26
|
-
* @param {String} [options.fallbackType='unkown'] Reflib fallback type if the incoming type is unrecognised or unsupported
|
|
27
|
-
* @param {Set<String>} [options.fieldsOverwrite] Set of field names where the value is clobbered rather than appended if discovered more than once
|
|
28
|
-
* @param {Boolean} [options.preserveUnknownKeys=true] Retain keys we do not have a direct lookup for in the output object
|
|
29
|
-
*
|
|
30
|
-
* @returns {Object} A readable stream analogue defined in `modules/interface.js`
|
|
31
|
-
*/
|
|
32
|
-
export function readStream(stream, options) {
|
|
33
|
-
let settings = {
|
|
34
|
-
recNumberNumeric: true,
|
|
35
|
-
recNumberRNPrefix: true,
|
|
36
|
-
recNumberKey: true,
|
|
37
|
-
fallbackType: 'unknown',
|
|
38
|
-
fieldsOverwrite: new Set(['type']),
|
|
39
|
-
preserveUnkownKeys: true,
|
|
40
|
-
...options,
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
let emitter = Emitter();
|
|
44
|
-
let buffer = '';
|
|
45
|
-
let mode = MODES.REF;
|
|
46
|
-
let state; // Misc state storage when we're digesting ref data
|
|
47
|
-
let ref = {}; // Reference item being constructed
|
|
48
|
-
|
|
49
|
-
// Queue up the parser in the next tick (so we can return the emitter first)
|
|
50
|
-
setTimeout(()=> {
|
|
51
|
-
stream
|
|
52
|
-
.on('error', e => emitter.emit('error', e))
|
|
53
|
-
.on('end', ()=> emitter.emit('end'))
|
|
54
|
-
.on('data', chunkBuffer => {
|
|
55
|
-
emitter.emit('progress', stream.bytesRead);
|
|
56
|
-
buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
|
|
57
|
-
|
|
58
|
-
while (true) {
|
|
59
|
-
let match; // Regex storage for match groups
|
|
60
|
-
if ((mode == MODES.REF) && (match = /^\s*@(?<type>\w+?)\s*\{(?<id>.*?),/s.exec(buffer))) {
|
|
61
|
-
if (settings.recNumberNumeric && isFinite(match.groups.id)) { // Accept numeric recNumber
|
|
62
|
-
ref.recNumber = +match.groups.id;
|
|
63
|
-
} else if (settings.recNumberRNPrefix && /^RN\d+$/.test(match.groups.id)) {
|
|
64
|
-
ref.recNumber = +match.groups.id.slice(2);
|
|
65
|
-
} else if (!settings.recNumberNumeric && match.groups.id) { // Non numeric / finite ID - but we're allowed to accept it anyway
|
|
66
|
-
ref.recNumber = +match.groups.id;
|
|
67
|
-
} else if (settings.recNumberKey) { // Non numeric, custom looking key, stash in 'key' instead
|
|
68
|
-
ref.key = match.groups.id;
|
|
69
|
-
} // Implied else - No ID, ignore
|
|
70
|
-
|
|
71
|
-
ref.type = match.groups.type;
|
|
72
|
-
mode = MODES.FIELDS;
|
|
73
|
-
state = null;
|
|
74
|
-
} else if (mode == MODES.FIELDS && (match = /^\s*(?<field>\w+?)\s*=\s*/s.exec(buffer))) {
|
|
75
|
-
mode = MODES.FIELD_START;
|
|
76
|
-
state = {field: match.groups.field};
|
|
77
|
-
} else if (mode == MODES.FIELDS && (match = /^\s*\}\s*/s.exec(buffer))) { // End of ref
|
|
78
|
-
emitter.emit('ref', tidyRef(ref, settings));
|
|
79
|
-
mode = MODES.REF;
|
|
80
|
-
ref = {};
|
|
81
|
-
state = null;
|
|
82
|
-
} else if (mode == MODES.FIELD_START && (match = /^\s*(?<fieldWrapper>"|{)\s*/.exec(buffer))) {
|
|
83
|
-
mode = MODES.FIELD_VALUE;
|
|
84
|
-
state.fieldWrapper = match.groups.fieldWrapper;
|
|
85
|
-
} else if (
|
|
86
|
-
// TODO: Note that we use `\r?\n` as delimiters for field values, this is a cheat to avoid having to implement a full AST parser
|
|
87
|
-
// This is a hack but since most BibTeX files use properly formatted BibTeX this should work in the majority of cases
|
|
88
|
-
// This WILL break if given one continuous line of BibTeX though
|
|
89
|
-
// - MC 2026-01-02
|
|
90
|
-
mode == MODES.FIELD_VALUE
|
|
91
|
-
&& (
|
|
92
|
-
(
|
|
93
|
-
state.fieldWrapper == '{'
|
|
94
|
-
&& (match = /^(?<value>.+?)(?<!\\%)\}\s*,?\s*$/sm.exec(buffer))
|
|
95
|
-
)
|
|
96
|
-
|| (
|
|
97
|
-
state.fieldWrapper == '"'
|
|
98
|
-
&& (match = /^(?<value>.+?)"\s*,?\s*$/sm.exec(buffer))
|
|
99
|
-
)
|
|
100
|
-
)
|
|
101
|
-
) {
|
|
102
|
-
mode = MODES.FIELDS;
|
|
103
|
-
if (// Already have content - and we should overwrite
|
|
104
|
-
ref[state.field] !== undefined
|
|
105
|
-
&& (
|
|
106
|
-
settings.preserveUnkownKeys
|
|
107
|
-
|| settings.fieldsOverwrite.has(state.field)
|
|
108
|
-
)
|
|
109
|
-
) {
|
|
110
|
-
ref[state.field] = unescape(match.groups.value);
|
|
111
|
-
} else if (ref[state.field] !== undefined) { // Already have content - append
|
|
112
|
-
ref[state.field] += '\n' + unescape(match.groups.value);
|
|
113
|
-
} else { // Populate initial value
|
|
114
|
-
ref[state.field] = unescape(match.groups.value);
|
|
115
|
-
}
|
|
116
|
-
state = null;
|
|
117
|
-
} else { // Implied else - No match to buffer, let it fill and process next data block
|
|
118
|
-
break;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
// Crop start of buffer to last match
|
|
122
|
-
buffer = buffer.slice(match[0].length);
|
|
123
|
-
}
|
|
124
|
-
})
|
|
125
|
-
})
|
|
126
|
-
|
|
127
|
-
return emitter;
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
/**
|
|
132
|
-
* Tidy up a raw BibTeX reference before emitting
|
|
133
|
-
*
|
|
134
|
-
* @param {Object} ref The input raw ref to tidy
|
|
135
|
-
*
|
|
136
|
-
* @param {Object} settings Optimized settings object for fast access
|
|
137
|
-
*
|
|
138
|
-
* @returns {Object} The tidied ref
|
|
139
|
-
*/
|
|
140
|
-
export function tidyRef(ref, settings) {
|
|
141
|
-
return Object.fromEntries(
|
|
142
|
-
Object.entries(ref)
|
|
143
|
-
.map(([key, val]) => {
|
|
144
|
-
let rlField = translations.fields.btMap.get(key.toLowerCase());
|
|
145
|
-
|
|
146
|
-
if (key == 'type') { // Special conversion for type
|
|
147
|
-
let rlType = ref.type && translations.types.btMap.get(val.toLowerCase());
|
|
148
|
-
return rlType
|
|
149
|
-
? [key, rlType.rl] // Can translate incoming type to Reflib type
|
|
150
|
-
: [key, settings.fallbackType] // Unknown Reflib type varient
|
|
151
|
-
} else if (!settings.preserveUnkownKeys && !rlField) { // Omit unknown fields
|
|
152
|
-
return;
|
|
153
|
-
} else if (rlField && rlField.array) { // Field needs array casting
|
|
154
|
-
return [rlField.rl, val.split(/\n*\s+and\s+/)];
|
|
155
|
-
} else if (rlField && rlField.rl) { // Known BT field but different RL field
|
|
156
|
-
return [rlField.rl, val];
|
|
157
|
-
} else if (settings.preserveUnkownKeys) { // Everything else - add field
|
|
158
|
-
return [key, val];
|
|
159
|
-
}
|
|
160
|
-
})
|
|
161
|
-
.filter(Boolean) // Remove duds
|
|
162
|
-
);
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
/**
|
|
167
|
-
* Translate a BibTeX encoded string into a regular JS String
|
|
168
|
-
*
|
|
169
|
-
* @param {String} str Input BibTeX encoded string
|
|
170
|
-
* @returns {String} Regular JS output string
|
|
171
|
-
*/
|
|
172
|
-
export function unescape(str) {
|
|
173
|
-
return str
|
|
174
|
-
.replace(/\/\*/g, '\n')
|
|
175
|
-
.replace(/\{\\\&\}/g, '&')
|
|
176
|
-
.replace(/\{\\\%\}/g, '%')
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
/**
|
|
181
|
-
* Translate a JS string into a BibTeX encoded string
|
|
182
|
-
*
|
|
183
|
-
* @param {String} str Input regular JS String
|
|
184
|
-
* @returns {String} BibTeX encoded string
|
|
185
|
-
*/
|
|
186
|
-
export function escape(str) {
|
|
187
|
-
return (''+str)
|
|
188
|
-
.replace(/\&/g, '{\\&}')
|
|
189
|
-
.replace(/%/g, '{\\%}')
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Write a RIS file to a writable stream
|
|
195
|
-
*
|
|
196
|
-
* @see modules/interface.js
|
|
197
|
-
*
|
|
198
|
-
* @param {Stream} stream The writable stream to write to
|
|
199
|
-
*
|
|
200
|
-
* @param {Object} [options] Additional options to use when parsing
|
|
201
|
-
* @param {string} [options.defaultType='Misc'] Default citation type to assume when no other type is specified
|
|
202
|
-
* @param {string} [options.delimeter='\r'] How to split multi-line items
|
|
203
|
-
* @param {Set} [options.omitFields] Set of special fields to always omit, either because we are ignoring or because we have special treatment for them
|
|
204
|
-
* @param {Boolean} [options.keyForce=true] Force a unique ID to exist if we don't already have one for each reference
|
|
205
|
-
* @param {Boolean} [options.recNumberRNPrefix=true] Rewrite recNumber fields as `RN${NUMBER}`
|
|
206
|
-
* @param {Boolean} [options.recNumberKey=true] If the reference `recNumber` is empty use `key<String>` instead
|
|
207
|
-
* @param {Boolean} [options.preserveUnknownKeys=true] Output keys we do not have a direct lookup for in the output object
|
|
208
|
-
*
|
|
209
|
-
* @returns {Object} A writable stream analogue defined in `modules/interface.js`
|
|
210
|
-
*/
|
|
211
|
-
export function writeStream(stream, options) {
|
|
212
|
-
let settings = {
|
|
213
|
-
defaultType: 'Misc',
|
|
214
|
-
delimeter: '\n',
|
|
215
|
-
omitFields: new Set(['key', 'recNumber', 'type']),
|
|
216
|
-
keyForce: true,
|
|
217
|
-
recNumberRNPrefix: true,
|
|
218
|
-
recNumberKey: true,
|
|
219
|
-
preserveUnkownKeys: true,
|
|
220
|
-
...options,
|
|
221
|
-
};
|
|
222
|
-
|
|
223
|
-
return {
|
|
224
|
-
start() {
|
|
225
|
-
return Promise.resolve();
|
|
226
|
-
},
|
|
227
|
-
write: ref => {
|
|
228
|
-
// Fetch Reflib type definition
|
|
229
|
-
ref.type ||= settings.defaultType;
|
|
230
|
-
let rlType = translations.types.rlMap.get(ref.type.toLowerCase());
|
|
231
|
-
let btType = rlType?.bt || settings.defaultType;
|
|
232
|
-
|
|
233
|
-
stream.write(
|
|
234
|
-
'@' + btType + '{'
|
|
235
|
-
+ (
|
|
236
|
-
ref.recNumber && settings.recNumberRNPrefix ? `RN${ref.recNumber},`
|
|
237
|
-
: ref.recNumber ? `${ref.recNumber},`
|
|
238
|
-
: ref.key ? `${ref.key},`
|
|
239
|
-
: settings.keyForce ? `${generateCitationKey(ref)},`
|
|
240
|
-
: ''
|
|
241
|
-
) + '\n'
|
|
242
|
-
+ Object.entries(ref)
|
|
243
|
-
.filter(([key, val]) =>
|
|
244
|
-
val // We have a non-nullish val
|
|
245
|
-
&& !settings.omitFields.has(key)
|
|
246
|
-
)
|
|
247
|
-
.reduce((buf, [rawKey, rawVal], keyIndex, keys) => {
|
|
248
|
-
// Fetch Reflib field definition
|
|
249
|
-
let rlField = translations.fields.rlMap.get(rawKey)
|
|
250
|
-
if (!rlField && !settings.preserveUnkownKeys) return buf; // Unknown field mapping - skip if were omitting unknown fields
|
|
251
|
-
|
|
252
|
-
let key = rlField ? rlField.bt : rawKey; // Use Reflib->BibTeX field mapping if we have one, otherwise use raw key
|
|
253
|
-
let val = escape( // Escape input value, either as an Array via join or as a flat string
|
|
254
|
-
rawKey == 'authors' && Array.isArray(rawVal) ? rawVal.join('
|
|
255
|
-
: Array.isArray(rawVal) ? rawVal.join(', ') // Treat other arrays as a CSV
|
|
256
|
-
: rawVal // Splat everything else as a string
|
|
257
|
-
);
|
|
258
|
-
|
|
259
|
-
return buf + // Return string buffer of ref under construction
|
|
260
|
-
`${key}={${val}}` // Append ref key=val pair to buffer
|
|
261
|
-
+ (keyIndex < keys.length-1 ? ',' : '') // Append comma (if non-last)
|
|
262
|
-
+ '\n' // Finish each field with a newline
|
|
263
|
-
}, '')
|
|
264
|
-
+ '}\n'
|
|
265
|
-
);
|
|
266
|
-
|
|
267
|
-
return Promise.resolve();
|
|
268
|
-
},
|
|
269
|
-
middle() {
|
|
270
|
-
stream.write('\n');
|
|
271
|
-
},
|
|
272
|
-
end() {
|
|
273
|
-
return new Promise((resolve, reject) =>
|
|
274
|
-
stream.end(err => err ? reject(err) : resolve())
|
|
275
|
-
);
|
|
276
|
-
},
|
|
277
|
-
};
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Generate a citation key from first author + year
|
|
283
|
-
* Example: "Roomruangwong2020"
|
|
284
|
-
*/
|
|
285
|
-
function generateCitationKey(ref) {
|
|
286
|
-
let author = 'Anon';
|
|
287
|
-
if (ref.authors && ref.authors.length > 0) {
|
|
288
|
-
author = ref.authors[0].split(',')[0];
|
|
289
|
-
}
|
|
290
|
-
|
|
291
|
-
let year = 'n.d.';
|
|
292
|
-
if (ref.year) {
|
|
293
|
-
year = ref.year;
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
return `${author}${year}`;
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Lookup tables for this module
|
|
302
|
-
* @type {Object}
|
|
303
|
-
* @property {Array<Object>} fields Field translations between Reflib (`rl`) and BibTeX format (`bt`)
|
|
304
|
-
*/
|
|
305
|
-
export let translations = {
|
|
306
|
-
// Field translations {{{
|
|
307
|
-
fields: {
|
|
308
|
-
collection: [
|
|
309
|
-
// Order by priority (highest at top)
|
|
310
|
-
{rl: 'address', bt: 'address'},
|
|
311
|
-
{rl: 'authors', bt: 'author', array: true},
|
|
312
|
-
{rl: 'doi', bt: 'doi'},
|
|
313
|
-
{rl: 'edition', bt: 'edition'},
|
|
314
|
-
{rl: 'editor', bt: 'editor'},
|
|
315
|
-
{rl: 'journal', bt: 'journal'},
|
|
316
|
-
{rl: 'notes', bt: 'note'},
|
|
317
|
-
{rl: 'number', bt: 'number'},
|
|
318
|
-
{rl: 'pages', bt: 'pages'},
|
|
319
|
-
{rl: 'title', bt: 'booktitle'},
|
|
320
|
-
{rl: 'title', bt: 'title'},
|
|
321
|
-
{rl: 'volume', bt: 'volume'},
|
|
322
|
-
{rl: 'isbn', bt: 'issn'},
|
|
323
|
-
|
|
324
|
-
// Misc
|
|
325
|
-
{bt: 'month'}, // Combined into {rl:'date'}
|
|
326
|
-
{bt: 'type'}, // Ignored
|
|
327
|
-
{bt: 'year'}, // Combined into {rl:'date'}
|
|
328
|
-
|
|
329
|
-
// Nonestandard but used anyway
|
|
330
|
-
{rl: 'abstract', bt: 'abstract'},
|
|
331
|
-
{rl: 'language', bt: 'language'},
|
|
332
|
-
{rl: 'keywords', bt: 'keywords', array: true},
|
|
333
|
-
{rl: 'urls', bt: 'url', array: true},
|
|
334
|
-
|
|
335
|
-
// Unknown how to translate these
|
|
336
|
-
// {bt: 'annote'},
|
|
337
|
-
// {bt: 'email'},
|
|
338
|
-
// {bt: 'chapter'},
|
|
339
|
-
// {bt: 'crossref'},
|
|
340
|
-
// {bt: 'howpublished'},
|
|
341
|
-
// {bt: 'institution'},
|
|
342
|
-
// {bt: 'key'},
|
|
343
|
-
// {bt: 'organization'},
|
|
344
|
-
// {bt: 'publisher'},
|
|
345
|
-
// {bt: 'school'},
|
|
346
|
-
// {bt: 'series'},
|
|
347
|
-
],
|
|
348
|
-
rlMap: new Map(),
|
|
349
|
-
btMap: new Map(),
|
|
350
|
-
},
|
|
351
|
-
// }}}
|
|
352
|
-
// Ref type translations {{{
|
|
353
|
-
types: {
|
|
354
|
-
collection: [
|
|
355
|
-
// Order by priority (highest at top)
|
|
356
|
-
{rl: 'journalArticle', bt: 'Article'},
|
|
357
|
-
{rl: 'book', bt: 'Book'},
|
|
358
|
-
{rl: 'bookSection', bt: 'InBook'},
|
|
359
|
-
{rl: 'conferencePaper', bt: 'Conference'},
|
|
360
|
-
{rl: 'conferenceProceedings', bt: 'InProceedings'},
|
|
361
|
-
{rl: 'report', bt: 'TechReport'},
|
|
362
|
-
{rl: 'thesis', bt: 'PHDThesis'},
|
|
363
|
-
{rl: 'unknown', bt: 'Misc'},
|
|
364
|
-
{rl: 'unpublished', bt: 'Unpublished'},
|
|
365
|
-
|
|
366
|
-
// Type aliases
|
|
367
|
-
{rl: 'journalArticle', bt: 'Journal Article'},
|
|
368
|
-
|
|
369
|
-
// Unknown how to translate these
|
|
370
|
-
{rl: 'Misc', bt: 'Booklet'},
|
|
371
|
-
{rl: 'Misc', bt: 'InCollection'},
|
|
372
|
-
{rl: 'Misc', bt: 'Manual'},
|
|
373
|
-
{rl: 'Misc', bt: 'MastersThesis'},
|
|
374
|
-
{rl: 'Misc', bt: 'Proceedings'},
|
|
375
|
-
],
|
|
376
|
-
rlMap: new Map(),
|
|
377
|
-
btMap: new Map(),
|
|
378
|
-
},
|
|
379
|
-
// }}}
|
|
380
|
-
};
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
/**
|
|
384
|
-
* @see modules/interface.js
|
|
385
|
-
*/
|
|
386
|
-
export function setup() {
|
|
387
|
-
// Create lookup object of translations.fields with key as .rl / val as the full object
|
|
388
|
-
translations.fields.collection.forEach(c => {
|
|
389
|
-
if (c.rl) translations.fields.rlMap.set(c.rl.toLowerCase(), c);
|
|
390
|
-
if (c.bt) translations.fields.btMap.set(c.bt, c);
|
|
391
|
-
});
|
|
392
|
-
|
|
393
|
-
// Create lookup object of ref.types with key as .rl / val as the full object
|
|
394
|
-
translations.types.collection.forEach(c => {
|
|
395
|
-
// Append each type to the set, accepting the first in each case as the priority
|
|
396
|
-
let rlLc = c.rl.toLowerCase();
|
|
397
|
-
let btLc = c.bt.toLowerCase();
|
|
398
|
-
if (c.rl && !translations.types.rlMap.has(rlLc)) translations.types.rlMap.set(rlLc, c);
|
|
399
|
-
if (c.bt && !translations.types.btMap.has(btLc)) translations.types.btMap.set(btLc, c);
|
|
400
|
-
});
|
|
401
|
-
}
|
|
1
|
+
import Emitter from '../shared/emitter.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Lookup enum for the current parser mode we are in
|
|
5
|
+
*
|
|
6
|
+
* @type {Object<Number>}
|
|
7
|
+
*/
|
|
8
|
+
const MODES = {
|
|
9
|
+
REF: 0,
|
|
10
|
+
FIELDS: 1,
|
|
11
|
+
FIELD_START: 2,
|
|
12
|
+
FIELD_VALUE: 3,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Parse a BibTeX file from a readable stream
|
|
18
|
+
*
|
|
19
|
+
* @see modules/interface.js
|
|
20
|
+
*
|
|
21
|
+
* @param {Stream} stream The readable stream to accept data from
|
|
22
|
+
* @param {Object} [options] Additional options to use when parsing
|
|
23
|
+
* @param {Boolean} [options.recNumberNumeric=true] Only process the BibTeX ID into a recNumber if its a finite numeric, otherwise disguard
|
|
24
|
+
* @param {Boolean} [options.recNumberRNPrefix=true] Accept `RN${NUMBER}` as recNumber if present
|
|
25
|
+
* @param {Boolean} [options.recNumberKey=true] If the reference key cannot be otherwise parsed store it in `key<String>` instead
|
|
26
|
+
* @param {String} [options.fallbackType='unkown'] Reflib fallback type if the incoming type is unrecognised or unsupported
|
|
27
|
+
* @param {Set<String>} [options.fieldsOverwrite] Set of field names where the value is clobbered rather than appended if discovered more than once
|
|
28
|
+
* @param {Boolean} [options.preserveUnknownKeys=true] Retain keys we do not have a direct lookup for in the output object
|
|
29
|
+
*
|
|
30
|
+
* @returns {Object} A readable stream analogue defined in `modules/interface.js`
|
|
31
|
+
*/
|
|
32
|
+
export function readStream(stream, options) {
|
|
33
|
+
let settings = {
|
|
34
|
+
recNumberNumeric: true,
|
|
35
|
+
recNumberRNPrefix: true,
|
|
36
|
+
recNumberKey: true,
|
|
37
|
+
fallbackType: 'unknown',
|
|
38
|
+
fieldsOverwrite: new Set(['type']),
|
|
39
|
+
preserveUnkownKeys: true,
|
|
40
|
+
...options,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
let emitter = Emitter();
|
|
44
|
+
let buffer = '';
|
|
45
|
+
let mode = MODES.REF;
|
|
46
|
+
let state; // Misc state storage when we're digesting ref data
|
|
47
|
+
let ref = {}; // Reference item being constructed
|
|
48
|
+
|
|
49
|
+
// Queue up the parser in the next tick (so we can return the emitter first)
|
|
50
|
+
setTimeout(()=> {
|
|
51
|
+
stream
|
|
52
|
+
.on('error', e => emitter.emit('error', e))
|
|
53
|
+
.on('end', ()=> emitter.emit('end'))
|
|
54
|
+
.on('data', chunkBuffer => {
|
|
55
|
+
emitter.emit('progress', stream.bytesRead);
|
|
56
|
+
buffer += chunkBuffer.toString(); // Append incomming data to the partial-buffer we're holding in memory
|
|
57
|
+
|
|
58
|
+
while (true) {
|
|
59
|
+
let match; // Regex storage for match groups
|
|
60
|
+
if ((mode == MODES.REF) && (match = /^\s*@(?<type>\w+?)\s*\{(?<id>.*?),/s.exec(buffer))) {
|
|
61
|
+
if (settings.recNumberNumeric && isFinite(match.groups.id)) { // Accept numeric recNumber
|
|
62
|
+
ref.recNumber = +match.groups.id;
|
|
63
|
+
} else if (settings.recNumberRNPrefix && /^RN\d+$/.test(match.groups.id)) {
|
|
64
|
+
ref.recNumber = +match.groups.id.slice(2);
|
|
65
|
+
} else if (!settings.recNumberNumeric && match.groups.id) { // Non numeric / finite ID - but we're allowed to accept it anyway
|
|
66
|
+
ref.recNumber = +match.groups.id;
|
|
67
|
+
} else if (settings.recNumberKey) { // Non numeric, custom looking key, stash in 'key' instead
|
|
68
|
+
ref.key = match.groups.id;
|
|
69
|
+
} // Implied else - No ID, ignore
|
|
70
|
+
|
|
71
|
+
ref.type = match.groups.type;
|
|
72
|
+
mode = MODES.FIELDS;
|
|
73
|
+
state = null;
|
|
74
|
+
} else if (mode == MODES.FIELDS && (match = /^\s*(?<field>\w+?)\s*=\s*/s.exec(buffer))) {
|
|
75
|
+
mode = MODES.FIELD_START;
|
|
76
|
+
state = {field: match.groups.field};
|
|
77
|
+
} else if (mode == MODES.FIELDS && (match = /^\s*\}\s*/s.exec(buffer))) { // End of ref
|
|
78
|
+
emitter.emit('ref', tidyRef(ref, settings));
|
|
79
|
+
mode = MODES.REF;
|
|
80
|
+
ref = {};
|
|
81
|
+
state = null;
|
|
82
|
+
} else if (mode == MODES.FIELD_START && (match = /^\s*(?<fieldWrapper>"|{)\s*/.exec(buffer))) {
|
|
83
|
+
mode = MODES.FIELD_VALUE;
|
|
84
|
+
state.fieldWrapper = match.groups.fieldWrapper;
|
|
85
|
+
} else if (
|
|
86
|
+
// TODO: Note that we use `\r?\n` as delimiters for field values, this is a cheat to avoid having to implement a full AST parser
|
|
87
|
+
// This is a hack but since most BibTeX files use properly formatted BibTeX this should work in the majority of cases
|
|
88
|
+
// This WILL break if given one continuous line of BibTeX though
|
|
89
|
+
// - MC 2026-01-02
|
|
90
|
+
mode == MODES.FIELD_VALUE
|
|
91
|
+
&& (
|
|
92
|
+
(
|
|
93
|
+
state.fieldWrapper == '{'
|
|
94
|
+
&& (match = /^(?<value>.+?)(?<!\\%)\}\s*,?\s*$/sm.exec(buffer))
|
|
95
|
+
)
|
|
96
|
+
|| (
|
|
97
|
+
state.fieldWrapper == '"'
|
|
98
|
+
&& (match = /^(?<value>.+?)"\s*,?\s*$/sm.exec(buffer))
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
) {
|
|
102
|
+
mode = MODES.FIELDS;
|
|
103
|
+
if (// Already have content - and we should overwrite
|
|
104
|
+
ref[state.field] !== undefined
|
|
105
|
+
&& (
|
|
106
|
+
settings.preserveUnkownKeys
|
|
107
|
+
|| settings.fieldsOverwrite.has(state.field)
|
|
108
|
+
)
|
|
109
|
+
) {
|
|
110
|
+
ref[state.field] = unescape(match.groups.value);
|
|
111
|
+
} else if (ref[state.field] !== undefined) { // Already have content - append
|
|
112
|
+
ref[state.field] += '\n' + unescape(match.groups.value);
|
|
113
|
+
} else { // Populate initial value
|
|
114
|
+
ref[state.field] = unescape(match.groups.value);
|
|
115
|
+
}
|
|
116
|
+
state = null;
|
|
117
|
+
} else { // Implied else - No match to buffer, let it fill and process next data block
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Crop start of buffer to last match
|
|
122
|
+
buffer = buffer.slice(match[0].length);
|
|
123
|
+
}
|
|
124
|
+
})
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
return emitter;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Tidy up a raw BibTeX reference before emitting
|
|
133
|
+
*
|
|
134
|
+
* @param {Object} ref The input raw ref to tidy
|
|
135
|
+
*
|
|
136
|
+
* @param {Object} settings Optimized settings object for fast access
|
|
137
|
+
*
|
|
138
|
+
* @returns {Object} The tidied ref
|
|
139
|
+
*/
|
|
140
|
+
export function tidyRef(ref, settings) {
|
|
141
|
+
return Object.fromEntries(
|
|
142
|
+
Object.entries(ref)
|
|
143
|
+
.map(([key, val]) => {
|
|
144
|
+
let rlField = translations.fields.btMap.get(key.toLowerCase());
|
|
145
|
+
|
|
146
|
+
if (key == 'type') { // Special conversion for type
|
|
147
|
+
let rlType = ref.type && translations.types.btMap.get(val.toLowerCase());
|
|
148
|
+
return rlType
|
|
149
|
+
? [key, rlType.rl] // Can translate incoming type to Reflib type
|
|
150
|
+
: [key, settings.fallbackType] // Unknown Reflib type varient
|
|
151
|
+
} else if (!settings.preserveUnkownKeys && !rlField) { // Omit unknown fields
|
|
152
|
+
return;
|
|
153
|
+
} else if (rlField && rlField.array) { // Field needs array casting
|
|
154
|
+
return [rlField.rl, val.split(/\n*\s+and\s+/)];
|
|
155
|
+
} else if (rlField && rlField.rl) { // Known BT field but different RL field
|
|
156
|
+
return [rlField.rl, val];
|
|
157
|
+
} else if (settings.preserveUnkownKeys) { // Everything else - add field
|
|
158
|
+
return [key, val];
|
|
159
|
+
}
|
|
160
|
+
})
|
|
161
|
+
.filter(Boolean) // Remove duds
|
|
162
|
+
);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Translate a BibTeX encoded string into a regular JS String
|
|
168
|
+
*
|
|
169
|
+
* @param {String} str Input BibTeX encoded string
|
|
170
|
+
* @returns {String} Regular JS output string
|
|
171
|
+
*/
|
|
172
|
+
export function unescape(str) {
|
|
173
|
+
return str
|
|
174
|
+
.replace(/\/\*/g, '\n')
|
|
175
|
+
.replace(/\{\\\&\}/g, '&')
|
|
176
|
+
.replace(/\{\\\%\}/g, '%')
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Translate a JS string into a BibTeX encoded string
|
|
182
|
+
*
|
|
183
|
+
* @param {String} str Input regular JS String
|
|
184
|
+
* @returns {String} BibTeX encoded string
|
|
185
|
+
*/
|
|
186
|
+
export function escape(str) {
|
|
187
|
+
return (''+str)
|
|
188
|
+
.replace(/\&/g, '{\\&}')
|
|
189
|
+
.replace(/%/g, '{\\%}')
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Write a RIS file to a writable stream
|
|
195
|
+
*
|
|
196
|
+
* @see modules/interface.js
|
|
197
|
+
*
|
|
198
|
+
* @param {Stream} stream The writable stream to write to
|
|
199
|
+
*
|
|
200
|
+
* @param {Object} [options] Additional options to use when parsing
|
|
201
|
+
* @param {string} [options.defaultType='Misc'] Default citation type to assume when no other type is specified
|
|
202
|
+
* @param {string} [options.delimeter='\r'] How to split multi-line items
|
|
203
|
+
* @param {Set} [options.omitFields] Set of special fields to always omit, either because we are ignoring or because we have special treatment for them
|
|
204
|
+
* @param {Boolean} [options.keyForce=true] Force a unique ID to exist if we don't already have one for each reference
|
|
205
|
+
* @param {Boolean} [options.recNumberRNPrefix=true] Rewrite recNumber fields as `RN${NUMBER}`
|
|
206
|
+
* @param {Boolean} [options.recNumberKey=true] If the reference `recNumber` is empty use `key<String>` instead
|
|
207
|
+
* @param {Boolean} [options.preserveUnknownKeys=true] Output keys we do not have a direct lookup for in the output object
|
|
208
|
+
*
|
|
209
|
+
* @returns {Object} A writable stream analogue defined in `modules/interface.js`
|
|
210
|
+
*/
|
|
211
|
+
export function writeStream(stream, options) {
|
|
212
|
+
let settings = {
|
|
213
|
+
defaultType: 'Misc',
|
|
214
|
+
delimeter: '\n',
|
|
215
|
+
omitFields: new Set(['key', 'recNumber', 'type']),
|
|
216
|
+
keyForce: true,
|
|
217
|
+
recNumberRNPrefix: true,
|
|
218
|
+
recNumberKey: true,
|
|
219
|
+
preserveUnkownKeys: true,
|
|
220
|
+
...options,
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
start() {
|
|
225
|
+
return Promise.resolve();
|
|
226
|
+
},
|
|
227
|
+
write: ref => {
|
|
228
|
+
// Fetch Reflib type definition
|
|
229
|
+
ref.type ||= settings.defaultType;
|
|
230
|
+
let rlType = translations.types.rlMap.get(ref.type.toLowerCase());
|
|
231
|
+
let btType = rlType?.bt || settings.defaultType;
|
|
232
|
+
|
|
233
|
+
stream.write(
|
|
234
|
+
'@' + btType + '{'
|
|
235
|
+
+ (
|
|
236
|
+
ref.recNumber && settings.recNumberRNPrefix ? `RN${ref.recNumber},`
|
|
237
|
+
: ref.recNumber ? `${ref.recNumber},`
|
|
238
|
+
: ref.key ? `${ref.key},`
|
|
239
|
+
: settings.keyForce ? `${generateCitationKey(ref)},`
|
|
240
|
+
: ''
|
|
241
|
+
) + '\n'
|
|
242
|
+
+ Object.entries(ref)
|
|
243
|
+
.filter(([key, val]) =>
|
|
244
|
+
val // We have a non-nullish val
|
|
245
|
+
&& !settings.omitFields.has(key)
|
|
246
|
+
)
|
|
247
|
+
.reduce((buf, [rawKey, rawVal], keyIndex, keys) => {
|
|
248
|
+
// Fetch Reflib field definition
|
|
249
|
+
let rlField = translations.fields.rlMap.get(rawKey)
|
|
250
|
+
if (!rlField && !settings.preserveUnkownKeys) return buf; // Unknown field mapping - skip if were omitting unknown fields
|
|
251
|
+
|
|
252
|
+
let key = rlField ? rlField.bt : rawKey; // Use Reflib->BibTeX field mapping if we have one, otherwise use raw key
|
|
253
|
+
let val = escape( // Escape input value, either as an Array via join or as a flat string
|
|
254
|
+
rawKey == 'authors' && Array.isArray(rawVal) ? rawVal.join(' and ') // Special joining conditions for author field
|
|
255
|
+
: Array.isArray(rawVal) ? rawVal.join(', ') // Treat other arrays as a CSV
|
|
256
|
+
: rawVal // Splat everything else as a string
|
|
257
|
+
);
|
|
258
|
+
|
|
259
|
+
return buf + // Return string buffer of ref under construction
|
|
260
|
+
`${key}={${val}}` // Append ref key=val pair to buffer
|
|
261
|
+
+ (keyIndex < keys.length-1 ? ',' : '') // Append comma (if non-last)
|
|
262
|
+
+ '\n' // Finish each field with a newline
|
|
263
|
+
}, '')
|
|
264
|
+
+ '}\n'
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
return Promise.resolve();
|
|
268
|
+
},
|
|
269
|
+
middle() {
|
|
270
|
+
stream.write('\n');
|
|
271
|
+
},
|
|
272
|
+
end() {
|
|
273
|
+
return new Promise((resolve, reject) =>
|
|
274
|
+
stream.end(err => err ? reject(err) : resolve())
|
|
275
|
+
);
|
|
276
|
+
},
|
|
277
|
+
};
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Generate a citation key from first author + year
|
|
283
|
+
* Example: "Roomruangwong2020"
|
|
284
|
+
*/
|
|
285
|
+
function generateCitationKey(ref) {
|
|
286
|
+
let author = 'Anon';
|
|
287
|
+
if (ref.authors && ref.authors.length > 0) {
|
|
288
|
+
author = ref.authors[0].split(',')[0];
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
let year = 'n.d.';
|
|
292
|
+
if (ref.year) {
|
|
293
|
+
year = ref.year;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return `${author}${year}`;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Lookup tables for this module
|
|
302
|
+
* @type {Object}
|
|
303
|
+
* @property {Array<Object>} fields Field translations between Reflib (`rl`) and BibTeX format (`bt`)
|
|
304
|
+
*/
|
|
305
|
+
export let translations = {
|
|
306
|
+
// Field translations {{{
|
|
307
|
+
fields: {
|
|
308
|
+
collection: [
|
|
309
|
+
// Order by priority (highest at top)
|
|
310
|
+
{rl: 'address', bt: 'address'},
|
|
311
|
+
{rl: 'authors', bt: 'author', array: true},
|
|
312
|
+
{rl: 'doi', bt: 'doi'},
|
|
313
|
+
{rl: 'edition', bt: 'edition'},
|
|
314
|
+
{rl: 'editor', bt: 'editor'},
|
|
315
|
+
{rl: 'journal', bt: 'journal'},
|
|
316
|
+
{rl: 'notes', bt: 'note'},
|
|
317
|
+
{rl: 'number', bt: 'number'},
|
|
318
|
+
{rl: 'pages', bt: 'pages'},
|
|
319
|
+
{rl: 'title', bt: 'booktitle'},
|
|
320
|
+
{rl: 'title', bt: 'title'},
|
|
321
|
+
{rl: 'volume', bt: 'volume'},
|
|
322
|
+
{rl: 'isbn', bt: 'issn'},
|
|
323
|
+
|
|
324
|
+
// Misc
|
|
325
|
+
{bt: 'month'}, // Combined into {rl:'date'}
|
|
326
|
+
{bt: 'type'}, // Ignored
|
|
327
|
+
{bt: 'year'}, // Combined into {rl:'date'}
|
|
328
|
+
|
|
329
|
+
// Nonestandard but used anyway
|
|
330
|
+
{rl: 'abstract', bt: 'abstract'},
|
|
331
|
+
{rl: 'language', bt: 'language'},
|
|
332
|
+
{rl: 'keywords', bt: 'keywords', array: true},
|
|
333
|
+
{rl: 'urls', bt: 'url', array: true},
|
|
334
|
+
|
|
335
|
+
// Unknown how to translate these
|
|
336
|
+
// {bt: 'annote'},
|
|
337
|
+
// {bt: 'email'},
|
|
338
|
+
// {bt: 'chapter'},
|
|
339
|
+
// {bt: 'crossref'},
|
|
340
|
+
// {bt: 'howpublished'},
|
|
341
|
+
// {bt: 'institution'},
|
|
342
|
+
// {bt: 'key'},
|
|
343
|
+
// {bt: 'organization'},
|
|
344
|
+
// {bt: 'publisher'},
|
|
345
|
+
// {bt: 'school'},
|
|
346
|
+
// {bt: 'series'},
|
|
347
|
+
],
|
|
348
|
+
rlMap: new Map(),
|
|
349
|
+
btMap: new Map(),
|
|
350
|
+
},
|
|
351
|
+
// }}}
|
|
352
|
+
// Ref type translations {{{
|
|
353
|
+
types: {
|
|
354
|
+
collection: [
|
|
355
|
+
// Order by priority (highest at top)
|
|
356
|
+
{rl: 'journalArticle', bt: 'Article'},
|
|
357
|
+
{rl: 'book', bt: 'Book'},
|
|
358
|
+
{rl: 'bookSection', bt: 'InBook'},
|
|
359
|
+
{rl: 'conferencePaper', bt: 'Conference'},
|
|
360
|
+
{rl: 'conferenceProceedings', bt: 'InProceedings'},
|
|
361
|
+
{rl: 'report', bt: 'TechReport'},
|
|
362
|
+
{rl: 'thesis', bt: 'PHDThesis'},
|
|
363
|
+
{rl: 'unknown', bt: 'Misc'},
|
|
364
|
+
{rl: 'unpublished', bt: 'Unpublished'},
|
|
365
|
+
|
|
366
|
+
// Type aliases
|
|
367
|
+
{rl: 'journalArticle', bt: 'Journal Article'},
|
|
368
|
+
|
|
369
|
+
// Unknown how to translate these
|
|
370
|
+
{rl: 'Misc', bt: 'Booklet'},
|
|
371
|
+
{rl: 'Misc', bt: 'InCollection'},
|
|
372
|
+
{rl: 'Misc', bt: 'Manual'},
|
|
373
|
+
{rl: 'Misc', bt: 'MastersThesis'},
|
|
374
|
+
{rl: 'Misc', bt: 'Proceedings'},
|
|
375
|
+
],
|
|
376
|
+
rlMap: new Map(),
|
|
377
|
+
btMap: new Map(),
|
|
378
|
+
},
|
|
379
|
+
// }}}
|
|
380
|
+
};
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
/**
|
|
384
|
+
* @see modules/interface.js
|
|
385
|
+
*/
|
|
386
|
+
export function setup() {
|
|
387
|
+
// Create lookup object of translations.fields with key as .rl / val as the full object
|
|
388
|
+
translations.fields.collection.forEach(c => {
|
|
389
|
+
if (c.rl) translations.fields.rlMap.set(c.rl.toLowerCase(), c);
|
|
390
|
+
if (c.bt) translations.fields.btMap.set(c.bt, c);
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
// Create lookup object of ref.types with key as .rl / val as the full object
|
|
394
|
+
translations.types.collection.forEach(c => {
|
|
395
|
+
// Append each type to the set, accepting the first in each case as the priority
|
|
396
|
+
let rlLc = c.rl.toLowerCase();
|
|
397
|
+
let btLc = c.bt.toLowerCase();
|
|
398
|
+
if (c.rl && !translations.types.rlMap.has(rlLc)) translations.types.rlMap.set(rlLc, c);
|
|
399
|
+
if (c.bt && !translations.types.btMap.has(btLc)) translations.types.btMap.set(btLc, c);
|
|
400
|
+
});
|
|
401
|
+
}
|