@natlibfi/marc-record-validators-melinda 9.0.5-alpha.1 → 9.0.7-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/isbn-issn.js CHANGED
@@ -36,77 +36,80 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
36
36
  description: 'Validates ISBN and ISSN values'
37
37
  };
38
38
 
39
+ function stringHasSpace(str) {
40
+ return str.indexOf(' ') > -1;
41
+ }
42
+
43
+ function trimSpaces(value) {
44
+ return value.replace(/^\s+/u, '').replace(/\s+$/u, '').replace(/\s+/gu, ' ');
45
+ }
46
+
47
+ function isMultiWord(inputString) {
48
+ const trimmedString = trimSpaces(inputString);
49
+ return stringHasSpace(trimmedString);
50
+ }
51
+
52
+ function getFirstWord(inputString) {
53
+ const trimmedString = trimSpaces(inputString);
54
+ const arr = trimmedString.split(' ');
55
+ return arr[0];
56
+ }
57
+
39
58
  function invalidISBN(isbn) {
40
- // If value contains space, it's not necessarily crap. (It's typically something like "1234567890 (nid.)".)
41
- // Check the first word in string:
42
- if (isbn.indexOf(' ') > -1) {
43
- // const arr = isbn.split(' ');
44
- // console.info(`invalidISBN(): Check '${arr[0]}' instead of '${isbn}'`); // eslint-disable-line no-console
45
- // return invalidISBN(arr[0]);
46
- return true;
47
- }
59
+ const isbnOnly = getFirstWord(isbn);
60
+ const auditedIsbn = ISBN.audit(isbnOnly);
61
+ return !auditedIsbn.validIsbn;
62
+ }
48
63
 
49
- const auditedIsbn = ISBN.audit(isbn);
50
- if (!auditedIsbn.validIsbn) {
51
- console.info(`Invalid ISBN detected: ${isbn}`); // eslint-disable-line no-console
52
- return true;
64
+ function invalidSubfield(subfield) {
65
+ if (subfield.code !== 'a') {
66
+ return false;
53
67
  }
54
- return false;
68
+ return invalidISBN(subfield.value) || isMultiWord(subfield.value);
55
69
  }
56
70
 
57
71
 
58
72
  function invalidField020(field) {
59
- if (field.subfields && field.subfields.some(sf => invalidField020a(sf) || invalidField020z(sf))) {
73
+ if (field.subfields && field.subfields.some(sf => invalidSubfield(sf))) {
60
74
  return true;
61
75
  }
62
76
  return false;
77
+ }
63
78
 
64
- function invalidField020a(subfield) {
65
- if (subfield.code !== 'a') {
66
- return false;
67
- }
68
- if (invalidISBN(subfield.value)) {
69
- return true;
70
- }
79
+ function subfieldsIsbnRequiresHyphenation(subfield) {
80
+ if (!hyphenateISBN || !['a', 'z'].includes(subfield.code)) {
71
81
  return false;
72
82
  }
73
83
 
74
- function invalidField020z(subfield) {
75
- if (subfield.code !== 'z' || !hyphenateISBN || invalidISBN(subfield.value)) {
76
- return false;
77
- }
78
- // We are only interested in $z field if it is valid ISBN that requires hyphenation:
79
- return subfield.value.indexOf('-') === -1;
84
+ const isbn = getFirstWord(subfield.value);
85
+ if (subfield.code === 'a') {
86
+ return requiresHyphenation(isbn);
80
87
  }
81
- }
82
88
 
83
- function subfieldRequiresHyphenation(subfield) {
84
- if (subfield.code !== 'a' && subfield.code !== 'z') {
89
+ // $z is a bit hacky: hyphenation is required only iff valid and no '-' chars
90
+ if (isbn.indexOf('-') > -1) {
85
91
  return false;
86
92
  }
87
- return requiresHyphenation(subfield.value);
93
+ return !invalidISBN(isbn);
88
94
 
89
95
  function requiresHyphenation(isbn) {
90
- if (isbn.indexOf(' ') > -1) {
91
- const arr = isbn.split(' ');
92
- console.info(`requiresHyphenation(): Check '${arr[0]}' instead of '${isbn}'`); // eslint-disable-line no-console
93
- return requiresHyphenation(arr[0]);
96
+ if (!hyphenateISBN) {
97
+ return false;
94
98
  }
99
+ // Handle old notation such as "978-952-396-001-5 (nid.)"
100
+ const isbn2 = getFirstWord(isbn);
95
101
 
96
- if (invalidISBN(isbn)) {
102
+ if (invalidISBN(isbn2)) {
97
103
  return false;
98
104
  }
99
- console.info(`sfRH ${isbn}`); // eslint-disable-line no-console
100
- const parsedIsbn = ISBN.parse(isbn);
101
- if (hyphenateISBN) {
102
- return !(isbn === parsedIsbn.isbn10h || isbn === parsedIsbn.isbn13h);
103
- }
104
- return false;
105
- //return !(isbn === parsedIsbn.isbn10 || isbn === parsedIsbn.isbn13);
105
+
106
+ const parsedIsbn = ISBN.parse(isbn2);
107
+ // Return true only if existing ISBN is a valid and hyphenated 10 or 13 digit ISBN:
108
+ return !(isbn2 === parsedIsbn.isbn10h || isbn2 === parsedIsbn.isbn13h);
106
109
  }
107
110
  }
108
111
 
109
- function getInvalidFields(record) {
112
+ function getRelevantFields(record) {
110
113
  //return record.get(/^(?:020|022)$/u).filter(field => {
111
114
  return record.fields.filter(field => {
112
115
  if (!field.subfields) {
@@ -114,10 +117,10 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
114
117
  }
115
118
  // Check ISBN:
116
119
  if (field.tag === '020') {
117
- if (invalidField020(field)) {
120
+ if (invalidField020(field)) { // checks multiwordness
118
121
  return true;
119
122
  }
120
- return fieldRequiresHyphenation(field);
123
+ return fieldsIsbnRequiresHyphenation(field);
121
124
  }
122
125
 
123
126
  // Check ISSN:
@@ -133,8 +136,8 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
133
136
  return false;
134
137
  });
135
138
 
136
- function fieldRequiresHyphenation(field) {
137
- return field.subfields && field.subfields.some(sf => subfieldRequiresHyphenation(sf));
139
+ function fieldsIsbnRequiresHyphenation(field) {
140
+ return field.subfields && field.subfields.some(sf => subfieldsIsbnRequiresHyphenation(sf));
138
141
  }
139
142
 
140
143
  function invalidField022(field) {
@@ -153,7 +156,7 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
153
156
  }
154
157
 
155
158
  function validate(record) {
156
- const fields = getInvalidFields(record);
159
+ const fields = getRelevantFields(record);
157
160
 
158
161
  if (fields.length === 0) {
159
162
  return {valid: true};
@@ -166,6 +169,10 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
166
169
  if (subfieldA) {
167
170
  return {name: 'ISBN', value: subfieldA.value};
168
171
  }
172
+ const subfieldZ = field.subfields.find(sf => sf.code === 'z');
173
+ if (subfieldZ) {
174
+ return {name: 'ISBN (subfield Z)', value: subfieldZ.value};
175
+ }
169
176
 
170
177
  return {name: 'ISBN', value: undefined};
171
178
  }
@@ -192,7 +199,7 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
192
199
 
193
200
 
194
201
  function fix(record) {
195
- getInvalidFields(record).forEach(field => {
202
+ getRelevantFields(record).forEach(field => {
196
203
  if (field.tag === '020') {
197
204
  field.subfields.forEach(subfield => fixField020Subfield(field, subfield));
198
205
  return;
@@ -206,50 +213,69 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
206
213
  }
207
214
  });
208
215
 
209
- /*
210
- function trimISBN(value) {
211
- return trimSpaces(value.replace(/\s$/gu, '')); // handle "1234567890 (nid.)" => "1234567890" as well as spaces
212
- }
213
- */
214
216
 
215
217
  function fixField020Subfield(field, subfield) {
216
- console.info(`fixField020Subfield ${subfield.code} '${subfield.value}'`); // eslint-disable-line no-console
217
- if (invalidISBN(subfield.value) || subfieldRequiresHyphenation(subfield)) {
218
- console.info(` fixField020Subfield ${subfield.code} '${subfield.value}'`); // eslint-disable-line no-console
218
+ split020A(); // subfield and field are in the scope
219
+ addHyphens(subfield);
220
+ handleInvalidIsbn(field, subfield); // remove 020$a, add 020$z, Do this last, as it uses deletion
221
+ return;
222
+
223
+ function addHyphens(subfield) {
224
+ if (!subfieldsIsbnRequiresHyphenation(subfield)) {
225
+ return;
226
+ }
219
227
  // ISBN is valid but is missing hyphens
220
228
  const normalizedValue = normalizeIsbnValue(subfield.value);
221
229
  if (normalizedValue !== undefined) { // eslint-disable-line functional/no-conditional-statement
222
230
  subfield.value = normalizedValue; // eslint-disable-line functional/immutable-data
223
- } else if (subfield.code === 'a' && handleInvalid) { // eslint-disable-line functional/no-conditional-statement
224
- // $a => $z (bit overkill to add $z and remove $a instead of just renaming, but too lazy to fix/test thorougly)
225
- field.subfields.push({code: 'z', value: subfield.value}); // eslint-disable-line functional/immutable-data
226
- record.removeSubfield(subfield, field);
227
231
  }
228
232
  }
229
- return;
230
233
 
231
- function normalizeIsbnValue2(trimmedValue) {
232
- //const trimmedValue = trimISBN(value); // NB! This might lose information that should be stored in $q...
233
- const auditResult = ISBN.audit(trimmedValue);
234
- if (auditResult.validIsbn) {
235
- const parsedIsbn = ISBN.parse(trimmedValue);
236
- if (hyphenateISBN) { // eslint-disable-line functional/no-conditional-statement
237
- return trimmedValue.length === 10 ? parsedIsbn.isbn10h : parsedIsbn.isbn13h; // eslint-disable-line functional/immutable-data
238
- }
239
- // Just trim
240
- return trimmedValue.length === 10 ? parsedIsbn.isbn10 : parsedIsbn.isbn13; // eslint-disable-line functional/immutable-data
234
+ function handleInvalidIsbn(field, subfield) {
235
+ if (subfield.code !== 'a' || !handleInvalid) {
236
+ return;
241
237
  }
242
- return undefined;
238
+ const head = getFirstWord(subfield.value);
239
+ if (!invalidISBN(head)) {
240
+ return;
241
+ }
242
+ // $a => $z (bit overkill to add $z and remove $a instead of just renaming, but too lazy to fix/test thorougly)
243
+ field.subfields.push({code: 'z', value: subfield.value}); // eslint-disable-line functional/immutable-data
244
+ record.removeSubfield(subfield, field);
245
+ }
246
+
247
+ function split020A() {
248
+ // Move non-initial words from $a to $q:
249
+ if (subfield.code !== 'a') {
250
+ return;
251
+ }
252
+ const value = trimSpaces(subfield.value);
253
+ const position = value.indexOf(' ');
254
+ if (position === -1) {
255
+ return;
256
+ }
257
+ const head = getFirstWord(value);
258
+ if (invalidISBN(head)) { // Don't split, if first word ain't ISBN
259
+ return;
260
+ }
261
+ const tail = value.substring(position + 1);
262
+ subfield.value = head; // eslint-disable-line functional/immutable-data
263
+ field.subfields.push({code: 'q', value: tail}); // eslint-disable-line functional/immutable-data
243
264
  }
244
265
 
245
266
  function normalizeIsbnValue(value) {
246
- const trimmedValue = value.replace(/^\s+/gu, '');
247
- if (trimmedValue.indexOf(' ') === -1) {
248
- return normalizeIsbnValue2(trimmedValue);
267
+ const trimmedValue = getFirstWord(value);
268
+ //const trimmedValue = trimISBN(value); // NB! This might lose information that should be stored in $q...
269
+ const auditResult = ISBN.audit(trimmedValue);
270
+ if (!auditResult.validIsbn) {
271
+ return undefined;
272
+ }
273
+ const numbersOnly = trimmedValue.replace(/[^0-9Xx]+/ug, '');
274
+ const parsedIsbn = ISBN.parse(trimmedValue);
275
+ if (hyphenateISBN) { // eslint-disable-line functional/no-conditional-statement
276
+ return numbersOnly.length === 10 ? parsedIsbn.isbn10h : parsedIsbn.isbn13h; // eslint-disable-line functional/immutable-data
249
277
  }
250
- const [head] = trimmedValue.split(' ');
251
- // NB! We currently drop the tail part, as it prevents us from pairing doubles. Parametrize?
252
- return normalizeIsbnValue2(head);
278
+ return numbersOnly.length === 10 ? parsedIsbn.isbn10 : parsedIsbn.isbn13; // eslint-disable-line functional/immutable-data
253
279
  }
254
280
  }
255
281
  }
@@ -4,7 +4,7 @@
4
4
  *
5
5
  * MARC record validators used in Melinda
6
6
  *
7
- * Copyright (c) 2014-2020 University Of Helsinki (The National Library Of Finland)
7
+ * Copyright (c) 2014-2022 University Of Helsinki (The National Library Of Finland)
8
8
  *
9
9
  * This file is part of marc-record-validators-melinda
10
10
  *
@@ -84,6 +84,12 @@ describe('isbn-issn', () => {
84
84
  ind2: ' ',
85
85
  subfields: [{code: 'a', value: 'foo'}]
86
86
  },
87
+ {
88
+ tag: '020',
89
+ ind1: ' ',
90
+ ind2: ' ',
91
+ subfields: [{code: 'a', value: '90-68-31-372-X'}] // contains an extra hyphen
92
+ },
87
93
  {
88
94
  tag: '022',
89
95
  ind1: ' ',
@@ -140,16 +146,10 @@ describe('isbn-issn', () => {
140
146
  });
141
147
  });
142
148
 
143
- it('Finds the record invalid (Spaces in ISBN)', async () => {
149
+ it('Finds the record invalid (reason: multiword)', async () => {
144
150
  const validator = await validatorFactory();
145
151
  const record = new MarcRecord({
146
152
  fields: [
147
- {
148
- tag: '020',
149
- ind1: ' ',
150
- ind2: ' ',
151
- subfields: [{code: 'a', value: ' 9789519155470'}]
152
- },
153
153
  {
154
154
  tag: '020',
155
155
  ind1: ' ',
@@ -160,12 +160,7 @@ describe('isbn-issn', () => {
160
160
  });
161
161
  const result = await validator.validate(record);
162
162
 
163
- expect(result).to.eql({
164
- valid: false, messages: [
165
- 'ISBN ( 9789519155470) is not valid',
166
- 'ISBN (978-600-377-017-1 (nid.)) is not valid'
167
- ]
168
- });
163
+ expect(result).to.eql({valid: false, messages: ['ISBN (978-600-377-017-1 (nid.)) is not valid']});
169
164
  });
170
165
 
171
166
  it('Finds the record invalid (ISSN in \'l\'-subfield)', async () => {
@@ -206,6 +201,12 @@ describe('isbn-issn', () => {
206
201
  ind2: ' ',
207
202
  subfields: [{code: 'a', value: '9789519155470'}]
208
203
  },
204
+ {
205
+ tag: '020',
206
+ ind1: ' ',
207
+ ind2: ' ',
208
+ subfields: [{code: 'a', value: '9068-31-372-X'}] // legal digits, but bad hyphenation
209
+ },
209
210
  {
210
211
  tag: '020',
211
212
  ind1: ' ',
@@ -218,6 +219,7 @@ describe('isbn-issn', () => {
218
219
 
219
220
  expect(result).to.eql({valid: false, messages: [
220
221
  'ISBN (9789519155470) is not valid',
222
+ 'ISBN (9068-31-372-X) is not valid',
221
223
  'ISBN (386006004X) is not valid'
222
224
  ]});
223
225
  });
@@ -309,15 +311,16 @@ describe('isbn-issn', () => {
309
311
  tag: '020',
310
312
  ind1: ' ',
311
313
  ind2: ' ',
312
- subfields: [{code: 'a', value: '9786003770171 (nid.)'}]
314
+ subfields: [{code: 'a', value: '9786003770171 (nidottu)'}]
313
315
  }
314
316
  ]
315
317
  });
316
318
  await validator.fix(record);
317
319
 
318
320
  expect(record.fields).to.eql([
319
- {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '9786003770171'}]},
320
- {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '9786003770171'}]}
321
+ // NB! Initial space does not need to be removed. It's crap, but not this fixer's crap.
322
+ {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: ' 9786003770171'}]},
323
+ {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '9786003770171'}, {code: 'q', value: '(nidottu)'}]}
321
324
  ]);
322
325
  });
323
326
 
@@ -347,7 +350,9 @@ describe('isbn-issn', () => {
347
350
  subfields: [
348
351
  {code: 'a', value: '978-600-377-017-1'},
349
352
  {code: 'z', value: '978-600-377-017-1'},
350
- {code: 'z', value: 'foo bar'}
353
+ {code: 'z', value: 'foo bar'},
354
+ // NB! Technically $q should come before $z subfields, but this is good enough.
355
+ {code: 'q', value: '(nid.)'}
351
356
  ]
352
357
  }
353
358
  ]);
@@ -361,6 +366,12 @@ describe('isbn-issn', () => {
361
366
  tag: '005',
362
367
  value: 'whatever'
363
368
  },
369
+ {
370
+ tag: '020',
371
+ ind1: ' ',
372
+ ind2: ' ',
373
+ subfields: [{code: 'q', value: 'sidottu'}]
374
+ },
364
375
  {
365
376
  tag: '024',
366
377
  ind1: ' ',
@@ -373,6 +384,7 @@ describe('isbn-issn', () => {
373
384
 
374
385
  expect(record.fields).to.eql([
375
386
  {tag: '005', value: 'whatever'},
387
+ {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'q', value: 'sidottu'}]},
376
388
  {tag: '024', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: ' 9786003770171'}]}
377
389
  ]);
378
390
  });
@@ -389,6 +401,12 @@ describe('isbn-issn', () => {
389
401
  tag: '020', ind1: ' ', ind2: ' ',
390
402
  subfields: [{code: 'a', value: '917153086X'}]
391
403
  },
404
+ {
405
+ tag: '020',
406
+ ind1: ' ',
407
+ ind2: ' ',
408
+ subfields: [{code: 'a', value: '9068-31-372-X'}] // legal digits, but bad hyphenation
409
+ },
392
410
  {
393
411
  tag: '020', ind1: ' ', ind2: ' ',
394
412
  subfields: [{code: 'a', value: '386006004X (nid.)'}]
@@ -406,7 +424,8 @@ describe('isbn-issn', () => {
406
424
  expect(record.fields).to.eql([
407
425
  {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '978-9916-605-32-5'}]},
408
426
  {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '91-7153-086-X'}]},
409
- {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '3-86006-004-X'}]},
427
+ {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '90-6831-372-X'}]}, // corrected hyphens
428
+ {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'a', value: '3-86006-004-X'}, {code: 'q', value: '(nid.)'}]},
410
429
  {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'z', value: '978-9916-605-32-5'}]},
411
430
  {tag: '020', ind1: ' ', ind2: ' ', subfields: [{code: 'z', value: '978-9916-605-32-5'}]}
412
431
  ]);
@@ -86,6 +86,10 @@ export default function () {
86
86
  debug(`Handling subfield ${subfield.code}`);
87
87
  let portion = getPortion(subfield, rulesForField); // eslint-disable-line functional/no-let
88
88
 
89
+ if (portion === false) {
90
+ return;
91
+ }
92
+
89
93
  if (portion === 'CF' || portion === 'NC') {
90
94
  return;
91
95
  }
@@ -140,7 +144,7 @@ export default function () {
140
144
  const [portion] = rules.filter(rule => rule.namePortion === subfield.code).map(rule => rule.portion);
141
145
 
142
146
  if (portion === undefined) {
143
- throw new Error(`Unknown subfield code ${subfield.code}`);
147
+ return false;
144
148
  }
145
149
 
146
150
  return portion.toUpperCase();
@@ -63,7 +63,8 @@ describe('resolvable-ext-references-melinda', () => {
63
63
 
64
64
  it('Throws an error when prefixPattern not provided', async () => {
65
65
  const validator = await testContext.default({endpoint, prefixPattern, fields});
66
- await expect(validator.validate()).to.be.rejectedWith(Error, 'Cannot read property \'fields\' of undefined');
66
+ // Cannot read property 'fields' of undefined or Cannot read properties of undefined (reading 'fields')'
67
+ await expect(validator.validate()).to.be.rejectedWith(Error, /^Cannot read propert/u);
67
68
  });
68
69
 
69
70
  describe('#validate', () => {