@natlibfi/marc-record-validators-melinda 12.0.0-alpha.1 → 12.0.0-alpha.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/{melinda-node-tests.yml → melinda-node-tests-and-publish.yml} +36 -11
- package/dist/access-rights.test.js +1 -1
- package/dist/access-rights.test.js.map +1 -1
- package/dist/addMissingField337.test.js +1 -1
- package/dist/addMissingField337.test.js.map +1 -1
- package/dist/addMissingField338.test.js +1 -1
- package/dist/addMissingField338.test.js.map +1 -1
- package/dist/cyrillux-usemarcon-replacement.test.js +4 -7
- package/dist/cyrillux-usemarcon-replacement.test.js.map +2 -2
- package/dist/cyrillux.test.js +1 -1
- package/dist/cyrillux.test.js.map +1 -1
- package/dist/double-commas.test.js +1 -1
- package/dist/double-commas.test.js.map +1 -1
- package/dist/empty-fields.test.js +1 -1
- package/dist/empty-fields.test.js.map +1 -1
- package/dist/ending-punctuation-conf.js +6 -4
- package/dist/ending-punctuation-conf.js.map +2 -2
- package/dist/ending-punctuation.js +88 -18
- package/dist/ending-punctuation.js.map +3 -3
- package/dist/ending-punctuation.test.js +198 -103
- package/dist/ending-punctuation.test.js.map +2 -2
- package/dist/field-008-18-34-character-groups.test.js +1 -1
- package/dist/field-008-18-34-character-groups.test.js.map +1 -1
- package/dist/field-structure.test.js +1 -1
- package/dist/field-structure.test.js.map +1 -1
- package/dist/index.js +122 -59
- package/dist/index.js.map +2 -2
- package/dist/indicator-fixes.js +11 -1
- package/dist/indicator-fixes.js.map +2 -2
- package/dist/isbn-issn.js +8 -5
- package/dist/isbn-issn.js.map +2 -2
- package/dist/melindaCustomMergeFields.js +1 -1
- package/dist/melindaCustomMergeFields.js.map +2 -2
- package/dist/merge-fields/counterpartField.js +5 -0
- package/dist/merge-fields/counterpartField.js.map +2 -2
- package/dist/merge-fields/dataProvenance.js +29 -0
- package/dist/merge-fields/dataProvenance.js.map +7 -0
- package/dist/merge-fields/index.js +11 -2
- package/dist/merge-fields/index.js.map +2 -2
- package/dist/merge-fields/mergeField.js +1 -1
- package/dist/merge-fields/mergeField.js.map +2 -2
- package/dist/merge-fields.test.js +4 -2
- package/dist/merge-fields.test.js.map +2 -2
- package/dist/mergeField500Lisapainokset.js +1 -1
- package/dist/mergeField500Lisapainokset.js.map +2 -2
- package/dist/normalizeFieldForComparison.js +24 -0
- package/dist/normalizeFieldForComparison.js.map +2 -2
- package/dist/punctuation2.js +11 -5
- package/dist/punctuation2.js.map +2 -2
- package/dist/removeInferiorDataFields.js +2 -1
- package/dist/removeInferiorDataFields.js.map +2 -2
- package/dist/resolveOrphanedSubfield6s.js +1 -1
- package/dist/resolveOrphanedSubfield6s.js.map +2 -2
- package/dist/sortSubfields.js +5 -5
- package/dist/sortSubfields.js.map +2 -2
- package/dist/translate-terms.test.js +12 -2
- package/dist/translate-terms.test.js.map +2 -2
- package/dist/utils.js +9 -3
- package/dist/utils.js.map +2 -2
- package/package.json +22 -23
- package/src/access-rights.test.js +1 -1
- package/src/addMissingField337.test.js +1 -1
- package/src/addMissingField338.test.js +1 -1
- package/src/cyrillux-usemarcon-replacement.test.js +4 -9
- package/src/cyrillux.test.js +1 -1
- package/src/double-commas.test.js +1 -1
- package/src/empty-fields.test.js +1 -1
- package/src/ending-punctuation-conf.js +6 -5
- package/src/ending-punctuation.js +115 -24
- package/src/ending-punctuation.test.js +187 -104
- package/src/field-008-18-34-character-groups.test.js +1 -1
- package/src/field-structure.test.js +1 -1
- package/src/index.js +132 -59
- package/src/indicator-fixes.js +14 -1
- package/src/isbn-issn.js +11 -6
- package/src/melindaCustomMergeFields.js +1 -1
- package/src/merge-fields/counterpartField.js +6 -0
- package/src/merge-fields/dataProvenance.js +41 -0
- package/src/merge-fields/index.js +11 -2
- package/src/merge-fields/mergeField.js +2 -2
- package/src/merge-fields.test.js +6 -2
- package/src/mergeField500Lisapainokset.js +1 -1
- package/src/normalizeFieldForComparison.js +26 -0
- package/src/punctuation2.js +14 -5
- package/src/removeInferiorDataFields.js +4 -1
- package/src/resolveOrphanedSubfield6s.js +1 -1
- package/src/sortSubfields.js +7 -5
- package/src/translate-terms.test.js +25 -2
- package/src/utils.js +19 -3
- package/test-fixtures/indicator-fixes/10/expectedResult.json +11 -0
- package/test-fixtures/indicator-fixes/10/metadata.json +4 -0
- package/test-fixtures/indicator-fixes/10/record.json +11 -0
- package/test-fixtures/merge-fields/f05/expectedResult.json +24 -0
- package/test-fixtures/merge-fields/f05/metadata.json +6 -0
- package/test-fixtures/merge-fields/f05/record.json +30 -0
- package/test-fixtures/remove-inferior-datafields/f16/expectedResult.json +12 -0
- package/test-fixtures/remove-inferior-datafields/f16/metadata.json +5 -0
- package/test-fixtures/remove-inferior-datafields/f16/record.json +14 -0
- package/test-fixtures/translate-terms-data.js +42 -0
- package/src/melindaCustomMergeFields.json +0 -5120
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
*/
|
|
28
28
|
|
|
29
29
|
// Import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
30
|
-
import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
30
|
+
import {validPuncMarks, validQuoteChars, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
31
31
|
import createDebugLogger from 'debug';
|
|
32
32
|
|
|
33
33
|
const debug = createDebugLogger('@natlibfi/marc-record-validator-melinda/ending-punctuation');
|
|
@@ -65,40 +65,65 @@ export default function () {
|
|
|
65
65
|
|
|
66
66
|
// Field validation with punctuation rules for normal and special cases in subfunction (to reduce complexity to please travisci)
|
|
67
67
|
function validateField(field, linkedTag, fix, message) {
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
68
|
+
function getDefaultPuncMarks(tag) {
|
|
69
|
+
if (tag.match(/^[1678](?:00|10|11|30)/u) || tag === '740') { // As defined in Loppupisteohje
|
|
70
|
+
return `${validPuncMarks})`;
|
|
71
|
+
}
|
|
72
|
+
// We don't want ').' here either. However, Loppupisteohje is a bit iffy here.
|
|
73
|
+
// BUG: Note that our generic rules will remove dot from Finnish terms such as https://finto.fi/yso-aika/fi/page/p1069910600
|
|
74
|
+
if (['647', '648', '650', '651', '654', '655', '656', '657', '658', '662'].includes(tag)) {
|
|
75
|
+
return `${validPuncMarks})`;
|
|
76
|
+
}
|
|
77
|
+
if(['260'].includes(tag)) {
|
|
78
|
+
return `${validPuncMarks})]`;
|
|
79
|
+
}
|
|
80
|
+
return validPuncMarks;
|
|
72
81
|
}
|
|
73
82
|
|
|
74
83
|
// Punctuation rule (Boolean), Check no ending dot strict (Boolean)
|
|
75
84
|
function normalPuncRules(subfield, punc, tag, checkEnd, overrideValidPuncMarks) {
|
|
76
|
-
const puncMarks = overrideValidPuncMarks ||
|
|
77
|
-
const
|
|
78
|
-
const
|
|
85
|
+
const puncMarks = overrideValidPuncMarks || getDefaultPuncMarks(tag);
|
|
86
|
+
const lastChar = subfield.value.slice(-1);
|
|
87
|
+
const lastPuncMark = puncMarks.includes(lastChar); // If string ends to punctuation char
|
|
88
|
+
const lastPuncDot = '.'.includes(lastChar); // If string ends to dot
|
|
89
|
+
const penultimateCharacter = subfield.value.length >= 2 ? subfield.value.charAt(subfield.value.length - 2) : undefined;
|
|
90
|
+
const antePenultimateCharacter = subfield.value.length >= 3 ? subfield.value.charAt(subfield.value.length - 3) : undefined;
|
|
91
|
+
|
|
79
92
|
|
|
80
|
-
// Last char should be punc, but
|
|
93
|
+
// Last char should be punc, but it's not one of listed punctuation marks nor dot
|
|
81
94
|
if (punc && !(lastPuncMark || lastPuncDot)) {
|
|
82
|
-
//
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
95
|
+
//console.log(puncMarks)
|
|
96
|
+
if (penultimateCharacter && validQuoteChars.includes(lastChar) && puncMarks.includes(penultimateCharacter)) {
|
|
97
|
+
// Exception: do nothing! Ending in punc+quote combo is all right, and does not imply a missing punc
|
|
98
|
+
}
|
|
99
|
+
else {
|
|
100
|
+
// Console.log("1. Invalid punctuation - missing")
|
|
101
|
+
message.message.push(`Field ${tag} requires ending punctuation, ends in '${lastChar}'`);
|
|
102
|
+
if (fix) {
|
|
103
|
+
subfield.value = subfield.value.concat('.');
|
|
104
|
+
message.fix.push(`Field ${tag} - Added punctuation to $${subfield.code}`);
|
|
105
|
+
}
|
|
87
106
|
}
|
|
88
107
|
|
|
89
108
|
// Last char is dot, but previous char is one of punc marks, like 'Question?.'
|
|
90
|
-
} else if (lastPuncDot &&
|
|
109
|
+
} else if (lastPuncDot && penultimateCharacter && puncMarks.includes(penultimateCharacter)) {
|
|
91
110
|
// Console.log("2. Invalid punctuation - duplicate, like '?.'")
|
|
92
|
-
message.message.push(`Field ${tag} has
|
|
111
|
+
message.message.push(`Field ${tag} has an extra dot after '${penultimateCharacter}'`);
|
|
93
112
|
if (fix) {
|
|
94
113
|
subfield.value = subfield.value.slice(0, -1);
|
|
95
|
-
message.fix.push(`Field ${tag} - Removed
|
|
114
|
+
message.fix.push(`Field ${tag} - Removed dot after punctuation from $${subfield.code}`);
|
|
115
|
+
}
|
|
116
|
+
// Last char is dot, but previous two cars are punc+quote, like 'Lorum "Ipsum.".'
|
|
117
|
+
} else if (lastPuncDot && antePenultimateCharacter && validQuoteChars.includes(penultimateCharacter) && puncMarks.includes(antePenultimateCharacter)) {
|
|
118
|
+
message.message.push(`Field ${tag} has an extra dot in '${antePenultimateCharacter}${penultimateCharacter}${lastChar}'`);
|
|
119
|
+
if (fix) {
|
|
120
|
+
subfield.value = subfield.value.slice(0, -1);
|
|
121
|
+
message.fix.push(`Field ${tag} - Removed '${lastChar}' after '${antePenultimateCharacter}${penultimateCharacter}'`);
|
|
96
122
|
}
|
|
97
|
-
|
|
98
123
|
// Last char shouldn't be dot !! This is behind checkEnd boolean, because of dots at end of abbreviations, so this is checked only in special cases !!//
|
|
99
124
|
} else if (checkEnd && (!punc && lastPuncDot)) {
|
|
100
125
|
// Console.log("3. Invalid punctuation - Shouldn't be dot, is")
|
|
101
|
-
message.message.push(`Field ${tag} has
|
|
126
|
+
message.message.push(`Field ${tag} has unwanted ending punctuation '${lastChar}'`);
|
|
102
127
|
if (fix) {
|
|
103
128
|
subfield.value = subfield.value.slice(0, -1);
|
|
104
129
|
message.fix.push(`Field ${tag} - Removed punctuation from $${subfield.code}`);
|
|
@@ -136,7 +161,7 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
136
161
|
if (res.special.ifInd2 && res.special.ifInd2.includes(field.ind2)) {
|
|
137
162
|
normalPuncRules(lastSubField, res.special.ifBoth, tag, true, res.special.ifLastCharNot);
|
|
138
163
|
|
|
139
|
-
// Matches
|
|
164
|
+
// Matches exception to special rule, noPuncIfInd2 (likely with value 4, that indicates copyright mark)
|
|
140
165
|
} else if (res.special.noPuncIfInd2 && field.ind2 && res.special.noPuncIfInd2.includes(field.ind2)) {
|
|
141
166
|
normalPuncRules(lastSubField, !res.special.ifBoth, tag, true, res.special.ifLastCharNot);
|
|
142
167
|
|
|
@@ -158,12 +183,17 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
158
183
|
normalPuncRules(lastSubField, res.punc, tag, false, false);
|
|
159
184
|
|
|
160
185
|
// Search for Finnish terms
|
|
161
|
-
} else if (res.special.
|
|
186
|
+
} else if (res.special.termSubfieldCode) {
|
|
162
187
|
lastSubField = findLastSubfield(field);
|
|
163
188
|
|
|
164
189
|
if (lastSubField) {
|
|
165
|
-
const
|
|
166
|
-
|
|
190
|
+
const lexicon = getLexicon(field, res.special.termSubfieldCode);
|
|
191
|
+
const proceed = !finnishException(field, res.special.termSubfieldCode, false);
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
//const languageField = field.subfields.find(({code}) => code === res.special.termSubfieldCode);
|
|
195
|
+
//if (languageField && languageField.value && finnishTerms.some(p => p.test(languageField.value))) {
|
|
196
|
+
if (lexicon && finnishTerms.some(p => p.test(lexicon)) && proceed) {
|
|
167
197
|
// If (languageField && languageField.value && finnishTerms.indexOf(languageField.value) > -1) {
|
|
168
198
|
normalPuncRules(lastSubField, res.punc, tag, true, false);
|
|
169
199
|
} else {
|
|
@@ -199,6 +229,15 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
199
229
|
|
|
200
230
|
validateField(field, linkedTag, fix, message);
|
|
201
231
|
}
|
|
232
|
+
// fallback
|
|
233
|
+
else {
|
|
234
|
+
debug(`special is definedm but no rule applies`);
|
|
235
|
+
const lastSubField = findLastSubfield(field);
|
|
236
|
+
|
|
237
|
+
if (lastSubField) {
|
|
238
|
+
normalPuncRules(lastSubField, res.punc, field.tag, false, false, fix, message);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
202
241
|
}
|
|
203
242
|
|
|
204
243
|
let res = null;
|
|
@@ -232,8 +271,12 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
232
271
|
return;
|
|
233
272
|
}
|
|
234
273
|
|
|
274
|
+
const forceNormal = res.special ? finnishException(field, res.special.termSubfieldCode, true) : false;
|
|
235
275
|
// Normal rules
|
|
236
|
-
if (typeof res.special === 'undefined' || res.special === null) {
|
|
276
|
+
if (typeof res.special === 'undefined' || res.special === null || forceNormal) {
|
|
277
|
+
if (forceNormal) {
|
|
278
|
+
console.info("EXCEPTION. SKIP FINNISH RULES");
|
|
279
|
+
}
|
|
237
280
|
lastSubField = findLastSubfield(field);
|
|
238
281
|
|
|
239
282
|
if (lastSubField) {
|
|
@@ -260,3 +303,51 @@ export function validateSingleField(field, linkedTag, fix) {
|
|
|
260
303
|
return message;
|
|
261
304
|
}
|
|
262
305
|
|
|
306
|
+
function getLexicon(field, subfieldCode) {
|
|
307
|
+
const languageSubfield = field.subfields.find(({code}) => code === subfieldCode); // res.special.termSubfieldCode);
|
|
308
|
+
if (!languageSubfield || !languageSubfield.value) {
|
|
309
|
+
return undefined;
|
|
310
|
+
}
|
|
311
|
+
if (finnishTerms.find(p => p.test(languageSubfield.value))) {
|
|
312
|
+
return languageSubfield.value;
|
|
313
|
+
}
|
|
314
|
+
return undefined;
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function finnishException(field, termSubfieldCode, hasDot = true) {
|
|
318
|
+
const lexicon = getLexicon(field, termSubfieldCode);
|
|
319
|
+
if (!lexicon) {
|
|
320
|
+
return false;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const lastSubfield = findLastSubfield(field);
|
|
324
|
+
if (!lastSubfield || !lastSubfield.value) {
|
|
325
|
+
return false;
|
|
326
|
+
}
|
|
327
|
+
// Some terms can end in '.' that we want to keep
|
|
328
|
+
if (field.tag === '648') { // Yso-aika checks
|
|
329
|
+
//console.log(`Finnish Exception? '${lastSubfield.value}', '${lexicon}', '${field.tag}'`);
|
|
330
|
+
if (lexicon === 'yso/fin') { // 'eaa.' appears in prefLAbels and 'eKr.' in altLabels
|
|
331
|
+
if (hasDot) {
|
|
332
|
+
return lastSubfield.value.match(/ (?:eaa|[ej]Kr|jaa)\.$/u); // Finnish term from which the dot is not to be removed
|
|
333
|
+
}
|
|
334
|
+
return lastSubfield.value.match(/ (?:eaa|[ej]Kr)|jaa$/u); // Finnish word that needs a dot
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
if (lexicon === 'yso/swe') {
|
|
338
|
+
if (hasDot) {
|
|
339
|
+
return lastSubfield.value.match(/ (?:[ef]\.Kr|f\.v\.t)\.$/u);
|
|
340
|
+
}
|
|
341
|
+
return lastSubfield.value.match(/ (?:[ef]\.Kr|f\.v\.t)$/u);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
// yso has 'MODEL.LA.' and 'Corel R.A.V.E.' but these are so rare I'm not listing them
|
|
345
|
+
|
|
346
|
+
return false;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// This is used to find last subfield that should have punctuation
|
|
350
|
+
function findLastSubfield(field) {
|
|
351
|
+
const subfields = field.subfields.filter(sf => isNaN(sf.code) && 'value' in sf);
|
|
352
|
+
return subfields.slice(-1).shift();
|
|
353
|
+
}
|