@natlibfi/marc-record-validators-melinda 12.0.0-alpha.7 → 12.0.0-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -65,12 +65,6 @@ export default function () {
65
65
 
66
66
  // Field validation with punctuation rules for normal and special cases in subfunction (to reduce complexity to please travisci)
67
67
  function validateField(field, linkedTag, fix, message) {
68
- // This is used to find last subfield that should have punctuation
69
- function findLastSubfield(field) {
70
- const subfields = field.subfields.filter(sf => isNaN(sf.code) && 'value' in sf);
71
- return subfields.slice(-1).shift();
72
- }
73
-
74
68
  function getDefaultPuncMarks(tag) {
75
69
  if (tag.match(/^[1678](?:00|10|11|30)/u) || tag === '740') { // As defined in Loppupisteohje
76
70
  return `${validPuncMarks})`;
@@ -98,7 +92,7 @@ function validateField(field, linkedTag, fix, message) {
98
92
 
99
93
  // Last char should be punc, but it's not one of listed punctuation marks nor dot
100
94
  if (punc && !(lastPuncMark || lastPuncDot)) {
101
- console.log(puncMarks)
95
+ //console.log(puncMarks)
102
96
  if (penultimateCharacter && validQuoteChars.includes(lastChar) && puncMarks.includes(penultimateCharacter)) {
103
97
  // Exception: do nothing! Ending in punc+quote combo is all right, and does not imply a missing punc
104
98
  }
@@ -189,12 +183,17 @@ function validateField(field, linkedTag, fix, message) {
189
183
  normalPuncRules(lastSubField, res.punc, tag, false, false);
190
184
 
191
185
  // Search for Finnish terms
192
- } else if (res.special.termField) {
186
+ } else if (res.special.termSubfieldCode) {
193
187
  lastSubField = findLastSubfield(field);
194
188
 
195
189
  if (lastSubField) {
196
- const languageField = field.subfields.find(({code}) => code === res.special.termField);
197
- if (languageField && languageField.value && finnishTerms.some(p => p.test(languageField.value))) {
190
+ const lexicon = getLexicon(field, res.special.termSubfieldCode);
191
+ const proceed = !finnishException(field, res.special.termSubfieldCode, false);
192
+
193
+
194
+ //const languageField = field.subfields.find(({code}) => code === res.special.termSubfieldCode);
195
+ //if (languageField && languageField.value && finnishTerms.some(p => p.test(languageField.value))) {
196
+ if (lexicon && finnishTerms.some(p => p.test(lexicon)) && proceed) {
198
197
  // If (languageField && languageField.value && finnishTerms.indexOf(languageField.value) > -1) {
199
198
  normalPuncRules(lastSubField, res.punc, tag, true, false);
200
199
  } else {
@@ -272,8 +271,12 @@ function validateField(field, linkedTag, fix, message) {
272
271
  return;
273
272
  }
274
273
 
274
+ const forceNormal = res.special ? finnishException(field, res.special.termSubfieldCode, true) : false;
275
275
  // Normal rules
276
- if (typeof res.special === 'undefined' || res.special === null) {
276
+ if (typeof res.special === 'undefined' || res.special === null || forceNormal) {
277
+ if (forceNormal) {
278
+ console.info("EXCEPTION. SKIP FINNISH RULES");
279
+ }
277
280
  lastSubField = findLastSubfield(field);
278
281
 
279
282
  if (lastSubField) {
@@ -300,3 +303,51 @@ export function validateSingleField(field, linkedTag, fix) {
300
303
  return message;
301
304
  }
302
305
 
306
+ function getLexicon(field, subfieldCode) {
307
+ const languageSubfield = field.subfields.find(({code}) => code === subfieldCode); // res.special.termSubfieldCode);
308
+ if (!languageSubfield || !languageSubfield.value) {
309
+ return undefined;
310
+ }
311
+ if (finnishTerms.find(p => p.test(languageSubfield.value))) {
312
+ return languageSubfield.value;
313
+ }
314
+ return undefined;
315
+ }
316
+
317
+ function finnishException(field, termSubfieldCode, hasDot = true) {
318
+ const lexicon = getLexicon(field, termSubfieldCode);
319
+ if (!lexicon) {
320
+ return false;
321
+ }
322
+
323
+ const lastSubfield = findLastSubfield(field);
324
+ if (!lastSubfield || !lastSubfield.value) {
325
+ return false;
326
+ }
327
+ // Some terms can end in '.' that we want to keep
328
+ if (field.tag === '648') { // Yso-aika checks
329
+ //console.log(`Finnish Exception? '${lastSubfield.value}', '${lexicon}', '${field.tag}'`);
330
+ if (lexicon === 'yso/fin') { // 'eaa.' appears in prefLAbels and 'eKr.' in altLabels
331
+ if (hasDot) {
332
+ return lastSubfield.value.match(/ (?:eaa|[ej]Kr|jaa)\.$/u); // Finnish term from which the dot is not to be removed
333
+ }
334
+ return lastSubfield.value.match(/ (?:eaa|[ej]Kr)|jaa$/u); // Finnish word that needs a dot
335
+ }
336
+
337
+ if (lexicon === 'yso/swe') {
338
+ if (hasDot) {
339
+ return lastSubfield.value.match(/ (?:[ef]\.Kr|f\.v\.t)\.$/u);
340
+ }
341
+ return lastSubfield.value.match(/ (?:[ef]\.Kr|f\.v\.t)$/u);
342
+ }
343
+ }
344
+ // yso has 'MODEL.LA.' and 'Corel R.A.V.E.' but these are so rare I'm not listing them
345
+
346
+ return false;
347
+ }
348
+
349
+ // This is used to find last subfield that should have punctuation
350
+ function findLastSubfield(field) {
351
+ const subfields = field.subfields.filter(sf => isNaN(sf.code) && 'value' in sf);
352
+ return subfields.slice(-1).shift();
353
+ }
@@ -1585,7 +1585,7 @@ describe('ending-punctuation', () => {
1585
1585
  ]
1586
1586
  });
1587
1587
 
1588
- const recordVali648dFinNo = new MarcRecord({
1588
+ const recordValid648FinNo = new MarcRecord({
1589
1589
  leader: '',
1590
1590
  fields: [
1591
1591
  {
@@ -1596,6 +1596,16 @@ describe('ending-punctuation', () => {
1596
1596
  {code: 'a', value: '1900-luku'},
1597
1597
  {code: '2', value: 'yso/swe'}
1598
1598
  ]
1599
+ },
1600
+ { // Exception: term ending in dot:
1601
+ tag : '648',
1602
+ ind1: ' ',
1603
+ ind2: '7',
1604
+ subfields: [
1605
+ {code: 'a', value: '1200-luku eaa.'},
1606
+ {code: '2', value: 'yso/fin'},
1607
+ {code: '0', value: 'http://www.yso.fi/onto/yso/p1129911200'}
1608
+ ]
1599
1609
  }
1600
1610
  ]
1601
1611
  });
@@ -1668,7 +1678,7 @@ describe('ending-punctuation', () => {
1668
1678
 
1669
1679
  it('Finds record valid - 648 Finnish, without punc', async () => {
1670
1680
  const validator = await validatorFactory();
1671
- const result = await validator.validate(recordVali648dFinNo);
1681
+ const result = await validator.validate(recordValid648FinNo);
1672
1682
  assert.equal(result.valid, true);
1673
1683
  });
1674
1684
 
@@ -1713,7 +1723,7 @@ describe('ending-punctuation', () => {
1713
1723
  ]
1714
1724
  });
1715
1725
 
1716
- const recordInvali648dFinYes = new MarcRecord({
1726
+ const recordInvalid648FinYes = new MarcRecord({
1717
1727
  leader: '',
1718
1728
  fields: [
1719
1729
  {
@@ -1724,6 +1734,15 @@ describe('ending-punctuation', () => {
1724
1734
  {code: 'a', value: '1900-luku.'},
1725
1735
  {code: '2', value: 'yso/swe'}
1726
1736
  ]
1737
+ }, { // Exception:
1738
+ tag: '648',
1739
+ ind1: ' ',
1740
+ ind2: '7',
1741
+ subfields: [
1742
+ {code: 'a', value: '1200-luku eaa'},
1743
+ {code: '2', value: 'yso/fin'},
1744
+ {code: '0', value: 'http://www.yso.fi/onto/yso/p1129911200'}
1745
+ ]
1727
1746
  }
1728
1747
  ]
1729
1748
  });
@@ -1799,9 +1818,9 @@ describe('ending-punctuation', () => {
1799
1818
 
1800
1819
  it('Finds record invalid - 648 Finnish, with punc', async () => {
1801
1820
  const validator = await validatorFactory();
1802
- const result = await validator.validate(recordInvali648dFinYes);
1821
+ const result = await validator.validate(recordInvalid648FinYes);
1803
1822
  assert.deepEqual(result, {
1804
- message: ['Field 648 has unwanted ending punctuation \'.\''],
1823
+ message: ['Field 648 has unwanted ending punctuation \'.\'', "Field 648 requires ending punctuation, ends in 'a'"],
1805
1824
  valid: false
1806
1825
  });
1807
1826
  });
@@ -1857,11 +1876,16 @@ describe('ending-punctuation', () => {
1857
1876
 
1858
1877
  it('Repairs the invalid record - 648 Finnish, removes punc $a', async () => {
1859
1878
  const validator = await validatorFactory();
1860
- const result = await validator.fix(recordInvali648dFinYes);
1861
- assert.equal(recordInvali648dFinYes.equalsTo(recordVali648dFinNo), true);
1879
+ const result = await validator.fix(recordInvalid648FinYes);
1880
+ //console.info("NV-------");
1881
+ //console.info(JSON.stringify(recordInvalid648FinYes));
1882
+ //console.info(JSON.stringify(recordValid648FinNo));
1883
+ //assert.equal(recordInvalid648FinYes.equalsTo(recordValid648FinNo), true);
1884
+ assert.deepEqual(recordInvalid648FinYes, recordValid648FinNo);
1885
+ //console.info(JSON.stringify(result));
1862
1886
  assert.deepEqual(result, {
1863
- message: ['Field 648 has unwanted ending punctuation \'.\''],
1864
- fix: ['Field 648 - Removed punctuation from $a'],
1887
+ message: ['Field 648 has unwanted ending punctuation \'.\'', "Field 648 requires ending punctuation, ends in 'a'"],
1888
+ fix: ['Field 648 - Removed punctuation from $a', 'Field 648 - Added punctuation to $a'],
1865
1889
  valid: false
1866
1890
  });
1867
1891
  });
@@ -171,6 +171,18 @@ function normalize245Indicator1(field, record) {
171
171
  field.ind1 = field1XX.length === 0 ? '0' : '1';
172
172
  }
173
173
 
174
+ function noDisplayConstantGenerated520Indicator1(field) {
175
+ if (field.tag !== '520') {
176
+ return;
177
+ }
178
+ const as = field.subfields.filter(sf => sf.code === 'a');
179
+ // Set ind1=8 "no display constant generated" fro certain values (part of MELKEHITYS-2579):
180
+ if (as.length === 1 && ['Abstract.', 'Abstrakt.', 'Abstrakti.', 'Abstract.', 'English Summary.', 'Sammandrag.', 'Tiivistelmä.'].includes(field.subfields[0].value)) {
181
+ field.ind1 = '8';
182
+ }
183
+
184
+ }
185
+
174
186
  function normalize776Indicator2(field) {
175
187
  if (field.tag !== '776') {
176
188
  return;
@@ -242,6 +254,7 @@ function recordNormalizeIndicators(record) {
242
254
  function fieldNormalizeIndicators(field, record, languages) {
243
255
  normalize084Indicator1(field);
244
256
  normalize245Indicator1(field, record);
257
+ noDisplayConstantGenerated520Indicator1(field);
245
258
  normalizeNonFilingIndicator1(field, languages);
246
259
  normalizeNonFilingIndicator2(field, languages);
247
260
  normalize776Indicator2(field);
@@ -2,7 +2,7 @@
2
2
 
3
3
  import {subfieldArraysContainSameData} from "../utils.js";
4
4
 
5
- function tagToDataProvenanceSubfieldCode(tag) {
5
+ export function tagToDataProvenanceSubfieldCode(tag) {
6
6
  if ( ['533', '800', '810', '811', '830'].includes(tag)) {
7
7
  return 'y';
8
8
  }
@@ -10,9 +10,10 @@
10
10
  * (They are jumped over when looking for next (non-controlfield subfield)
11
11
  */
12
12
  import {validateSingleField} from './ending-punctuation.js';
13
+ import {tagToDataProvenanceSubfieldCode} from './merge-fields/dataProvenance.js';
13
14
  import {fieldGetUnambiguousTag} from './subfield6Utils.js';
14
15
  //import createDebugLogger from 'debug';
15
- import {fieldToString, nvdebug} from './utils.js';
16
+ import {fieldToString, isControlSubfieldCode, nvdebug} from './utils.js';
16
17
  import clone from 'clone';
17
18
 
18
19
  //const debug = createDebugLogger('debug/punctuation2');
@@ -49,12 +50,19 @@ export default function () {
49
50
  }
50
51
  }
51
52
 
52
- function isControlSubfield(subfield) {
53
- return ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'].includes(subfield.code);
53
+
54
+
55
+ function isIrrelevantSubfield(subfield, tag) {
56
+ const dataProvenanceSubfieldCode = tagToDataProvenanceSubfieldCode(tag);
57
+ if (subfield.code === dataProvenanceSubfieldCode) {
58
+ return true;
59
+ }
60
+ return isControlSubfieldCode(subfield.code); // Currently this contains other stuff as well ($3, $4, $7, $9...)
54
61
  }
55
62
 
63
+
56
64
  function getNextRelevantSubfield(field, currSubfieldIndex) {
57
- return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isControlSubfield(subfield));
65
+ return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isIrrelevantSubfield(subfield, field.tag));
58
66
  }
59
67
 
60
68
  export function fieldGetFixedString(field, add = true) {
@@ -155,7 +163,8 @@ const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remov
155
163
  const linkingEntryRemoveWhatever = [
156
164
  {'code': 'i', 'followedBy': 'at', 'remove': / ?:$/u}, // ':'
157
165
  {'code': 'at', 'remove': /\.$/u},
158
- {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz', 'remove': /\. -$/u}
166
+ // Only ". -" separator is still used in music. We can strip it, but can only create the non-music punctuation!
167
+ {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz#', 'remove': /\. -$/u}
159
168
  ];
160
169
 
161
170
 
@@ -1,9 +1,23 @@
1
1
  import assert from 'node:assert';
2
- import {MarcRecord} from '@natlibfi/marc-record';
2
+ import createDebugLogger from 'debug';
3
+ import fetchMock from 'fetch-mock';
4
+
3
5
  import validatorFactory from './translate-terms.js';
6
+
7
+ import {MarcRecord} from '@natlibfi/marc-record';
4
8
  import {READERS} from '@natlibfi/fixura';
5
9
  import generateTests from '@natlibfi/fixugen';
6
- import createDebugLogger from 'debug';
10
+ import {fakeData} from '../test-fixtures/translate-terms-data.js';
11
+
12
+ const uris = [
13
+ 'http://www.yso.fi/onto/yso/p13299',
14
+ 'http://www.yso.fi/onto/yso/p111739',
15
+ 'http://www.yso.fi/onto/yso/p6197061979',
16
+ 'http://www.yso.fi/onto/yso/p6196061969',
17
+ 'http://urn.fi/URN:NBN:fi:au:slm:s161'
18
+ ];
19
+
20
+
7
21
 
8
22
  generateTests({
9
23
  callback,
@@ -15,6 +29,15 @@ generateTests({
15
29
  },
16
30
  hooks: {
17
31
  before: async () => {
32
+
33
+ fetchMock.mockGlobal()
34
+ .get(`https://api.finto.fi/rest/v1/data?uri=${uris[0]}&format=application%2Fjson`, {status: 200, headers: {}, body: fakeData})
35
+ .get(`https://api.finto.fi/rest/v1/data?uri=${uris[1]}&format=application%2Fjson`, {status: 200, headers: {}, body: fakeData})
36
+ .get(`https://api.finto.fi/rest/v1/data?uri=${uris[2]}&format=application%2Fjson`, {status: 200, headers: {}, body: fakeData})
37
+ .get(`https://api.finto.fi/rest/v1/data?uri=${uris[3]}&format=application%2Fjson`, {status: 200, headers: {}, body: fakeData})
38
+ .get(`https://api.finto.fi/rest/v1/data?uri=${uris[4]}&format=application%2Fjson`, {status: 200, headers: {}, body: fakeData});
39
+
40
+
18
41
  testValidatorFactory();
19
42
  }
20
43
  }
package/src/utils.js CHANGED
@@ -103,6 +103,12 @@ export function nvdebugFieldArray(fields, prefix = ' ', func = undefined) {
103
103
  }
104
104
 
105
105
  export function isControlSubfieldCode(subfieldCode) {
106
+ // NB! Only $w, $0, $1, $5, $6 and $8 are really control subfields. In Finland $9 is oft a control subfield
107
+ // $3 material (part of the whole thing)
108
+ // $4 means 'relationship' (similar to relator terms at least in X00 and similar)
109
+ // $7 is usually provinance subfield. However, it can be stored in other subfields as well. See merge-fields/dataProvenance.js for details
110
+ // However, change this only if needed. Maybe all provinance subfields should return true?
111
+ // This may become relevant when AI starts to create stuff...
106
112
  if (['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'w'].includes(subfieldCode)) {
107
113
  return true;
108
114
  }
@@ -0,0 +1,11 @@
1
+ {
2
+ "_validationOptions": {},
3
+ "fields": [
4
+ { "tag": "005", "value": "20220202020202.0" },
5
+ { "tag": "520", "ind1": "8", "ind2": " ", "subfields": [ {"code": "a", "value": "Abstrakti."}, {"code": "9", "value": "TESTI<KEEP>"} ]},
6
+ { "tag": "520", "ind1": " ", "ind2": " ", "subfields": [ {"code": "a", "value": "Whatever."}]},
7
+ { "tag": "520", "ind1": "8", "ind2": " ", "subfields": [ {"code": "a", "value": "Tiivistelmä."}]}
8
+
9
+ ],
10
+ "leader": ""
11
+ }
@@ -0,0 +1,4 @@
1
+ {
2
+ "description": "10: certain value have ind1=8 as per MELKEHITYS-2579",
3
+ "fix": true
4
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "_validationOptions": {},
3
+ "fields": [
4
+ { "tag": "005", "value": "20220202020202.0" },
5
+ { "tag": "520", "ind1": " ", "ind2": " ", "subfields": [ {"code": "a", "value": "Abstrakti."}, {"code": "9", "value": "TESTI<KEEP>"} ]},
6
+ { "tag": "520", "ind1": " ", "ind2": " ", "subfields": [ {"code": "a", "value": "Whatever."}]},
7
+ { "tag": "520", "ind1": "3", "ind2": " ", "subfields": [ {"code": "a", "value": "Tiivistelmä."}]}
8
+
9
+ ],
10
+ "leader": ""
11
+ }
@@ -0,0 +1,42 @@
1
+ const fakeTerms = {
2
+ graph: [
3
+ {
4
+ uri: 'http://www.yso.fi/onto/yso/p13299',
5
+ prefLabel: [
6
+ { lang: 'fi', value: 'laiturit' },
7
+ { lang: 'sv', value: 'bryggor'}
8
+ ]
9
+ },
10
+ {
11
+ uri: 'http://www.yso.fi/onto/yso/p111739',
12
+ prefLabel: [
13
+ { lang: 'fi', value: 'Ivalo (Inari)' },
14
+ { lang: 'sv', value: 'Ivalo (Enare)'}
15
+ ]
16
+ },
17
+ {
18
+ uri: 'http://www.yso.fi/onto/yso/p6197061979',
19
+ prefLabel: [
20
+ { lang: 'fi', value: '1970-luku' },
21
+ { lang: 'sv', value: '1970-talet' }
22
+ ]
23
+ },
24
+ {
25
+ uri: 'http://www.yso.fi/onto/yso/p6196061969',
26
+ prefLabel: [
27
+ { lang: 'fi', value: '1960-luku' },
28
+ { lang: 'sv', value: '1960-talet' }
29
+ ]
30
+ },
31
+ {
32
+ uri: 'http://urn.fi/URN:NBN:fi:au:slm:s161',
33
+ prefLabel: [
34
+ { lang: 'fi', value: 'naistenlehdet' },
35
+ { lang: 'sv', value: 'damtidningar' }
36
+ ]
37
+ }
38
+ ]
39
+
40
+ };
41
+
42
+ export const fakeData = JSON.stringify(fakeTerms);