@natlibfi/marc-record-validators-melinda 12.0.7 → 12.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/addMissingField041.js +6 -3
- package/dist/addMissingField041.js.map +2 -2
- package/dist/addMissingField336.js +7 -4
- package/dist/addMissingField336.js.map +2 -2
- package/dist/addMissingField337.js +6 -3
- package/dist/addMissingField337.js.map +2 -2
- package/dist/addMissingField338.js +8 -5
- package/dist/addMissingField338.js.map +2 -2
- package/dist/cyrillux-usemarcon-replacement.js +5 -2
- package/dist/cyrillux-usemarcon-replacement.js.map +2 -2
- package/dist/cyrillux.js +10 -7
- package/dist/cyrillux.js.map +2 -2
- package/dist/disambiguateSeriesStatements.js +2 -1
- package/dist/disambiguateSeriesStatements.js.map +2 -2
- package/dist/drop-terms.js +5 -4
- package/dist/drop-terms.js.map +2 -2
- package/dist/fix-33X.js +7 -4
- package/dist/fix-33X.js.map +2 -2
- package/dist/fix-country-codes.js +5 -0
- package/dist/fix-country-codes.js.map +2 -2
- package/dist/fix-language-codes.js +5 -1
- package/dist/fix-language-codes.js.map +2 -2
- package/dist/fix-sami-041.js +11 -10
- package/dist/fix-sami-041.js.map +2 -2
- package/dist/indicator-fixes.js +5 -1
- package/dist/indicator-fixes.js.map +2 -2
- package/dist/merge-fields/counterpartField.js +6 -6
- package/dist/merge-fields/counterpartField.js.map +2 -2
- package/dist/merge-fields/mergableIndicator.js +0 -3
- package/dist/merge-fields/mergableIndicator.js.map +2 -2
- package/dist/merge-fields/worldKnowledge.js.map +2 -2
- package/dist/mergeRelatorTermFields.js +9 -6
- package/dist/mergeRelatorTermFields.js.map +2 -2
- package/dist/normalize-dashes.js +7 -4
- package/dist/normalize-dashes.js.map +2 -2
- package/dist/normalize-identifiers.js.map +2 -2
- package/dist/normalize-utf8-diacritics.js.map +2 -2
- package/dist/normalizeFieldForComparison.js.map +1 -1
- package/dist/normalizeSubfieldValueForComparison.js.map +1 -1
- package/dist/punctuation2.js +5 -2
- package/dist/punctuation2.js.map +2 -2
- package/dist/reindexSubfield6OccurenceNumbers.js +11 -10
- package/dist/reindexSubfield6OccurenceNumbers.js.map +2 -2
- package/dist/removeDuplicateDataFields.js +3 -2
- package/dist/removeDuplicateDataFields.js.map +2 -2
- package/dist/removeInferiorDataFields.js.map +2 -2
- package/dist/resolveOrphanedSubfield6s.js +3 -2
- package/dist/resolveOrphanedSubfield6s.js.map +2 -2
- package/dist/sortSubfields.js +1 -1
- package/dist/sortSubfields.js.map +2 -2
- package/dist/stripPunctuation.js +4 -3
- package/dist/stripPunctuation.js.map +2 -2
- package/dist/subfield6Utils.js +4 -1
- package/dist/subfield6Utils.js.map +2 -2
- package/dist/subfield8Utils.js.map +2 -2
- package/dist/translate-terms.js +4 -3
- package/dist/translate-terms.js.map +2 -2
- package/dist/typeOfDate-008.js +3 -1
- package/dist/typeOfDate-008.js.map +2 -2
- package/dist/update-field-540.js.map +2 -2
- package/dist/urn.js +13 -12
- package/dist/urn.js.map +2 -2
- package/package.json +7 -7
- package/src/addMissingField041.js +8 -4
- package/src/addMissingField336.js +10 -5
- package/src/addMissingField337.js +9 -5
- package/src/addMissingField338.js +11 -6
- package/src/cyrillux-usemarcon-replacement.js +9 -5
- package/src/cyrillux.js +18 -12
- package/src/disambiguateSeriesStatements.js +4 -1
- package/src/drop-terms.js +8 -6
- package/src/fix-33X.js +10 -6
- package/src/fix-country-codes.js +7 -3
- package/src/fix-language-codes.js +8 -4
- package/src/fix-sami-041.js +13 -11
- package/src/indicator-fixes.js +10 -7
- package/src/merge-fields/counterpartField.js +10 -10
- package/src/merge-fields/mergableIndicator.js +3 -3
- package/src/merge-fields/worldKnowledge.js +11 -6
- package/src/mergeRelatorTermFields.js +12 -11
- package/src/normalize-dashes.js +11 -5
- package/src/normalize-identifiers.js +12 -19
- package/src/normalize-utf8-diacritics.js +6 -3
- package/src/normalizeFieldForComparison.js +2 -2
- package/src/normalizeSubfieldValueForComparison.js +2 -2
- package/src/punctuation2.js +34 -30
- package/src/reindexSubfield6OccurenceNumbers.js +13 -11
- package/src/removeDuplicateDataFields.js +29 -27
- package/src/removeInferiorDataFields.js +28 -24
- package/src/resolveOrphanedSubfield6s.js +6 -4
- package/src/sortSubfields.js +5 -5
- package/src/stripPunctuation.js +5 -3
- package/src/subfield6Utils.js +33 -35
- package/src/subfield8Utils.js +10 -7
- package/src/translate-terms.js +13 -9
- package/src/typeOfDate-008.js +4 -1
- package/src/update-field-540.js +7 -5
- package/src/urn.js +17 -13
- package/test-fixtures/drop-terms/02/metadata.json +1 -1
- package/test-fixtures/drop-terms/03/metadata.json +1 -1
- package/test-fixtures/drop-terms/04/metadata.json +1 -1
|
@@ -217,7 +217,7 @@ function getUnbalancedPairedSubfieldCode(field1, field2) {
|
|
|
217
217
|
// (I'm not saying that 100$b/c/d/q are in 'paired' contraint, I'm just illustrating the issue here)
|
|
218
218
|
const pairable = pairableIdentifier(field1, field2, '(FIN11)');
|
|
219
219
|
const subfieldString = pairable ? removeNameRelatedSubfieldCodes(fullSubfieldString, field1.tag) : fullSubfieldString;
|
|
220
|
-
|
|
220
|
+
nvdebug(`CHECK ${pairable ? 'PAIRABLE ' : ''}${field1.tag} PAIRS: '${fullSubfieldString}' => '${subfieldString}'`, debugDev);
|
|
221
221
|
|
|
222
222
|
if (subfieldString === '') {
|
|
223
223
|
return false;
|
|
@@ -229,7 +229,7 @@ function getUnbalancedPairedSubfieldCode(field1, field2) {
|
|
|
229
229
|
|
|
230
230
|
function syntacticallyMergablePair(baseField, sourceField, config) {
|
|
231
231
|
// Indicators must typically be equal (there are exceptions such as non-filing characters though):
|
|
232
|
-
nvdebug("CHECK SYNTAX");
|
|
232
|
+
nvdebug("CHECK SYNTAX", debugDev);
|
|
233
233
|
if (!mergableIndicator1(baseField, sourceField, config)) {
|
|
234
234
|
nvdebug(`non-mergable (reason: indicator1): ${JSON.stringify(config)}`, debugDev);
|
|
235
235
|
return false;
|
|
@@ -310,7 +310,7 @@ function removeNameRelatedSubfieldCodes(codestring, tag) {
|
|
|
310
310
|
|
|
311
311
|
function pairableIdentifier(field1, field2, prefix) {
|
|
312
312
|
const normalizedPrefix = prefix;
|
|
313
|
-
nvdebug(`PREF '${prefix}' => '${normalizedPrefix}'
|
|
313
|
+
nvdebug(`PREF '${prefix}' => '${normalizedPrefix}'`, debugDev);
|
|
314
314
|
|
|
315
315
|
const prefixLength = normalizedPrefix.length;
|
|
316
316
|
const identifiers1 = getIdentifiers(field1);
|
|
@@ -378,8 +378,8 @@ function pairableValueInArray(tag, subfieldCode, val, arr) {
|
|
|
378
378
|
|
|
379
379
|
|
|
380
380
|
function tightSubfieldMatch(field1, field2, subfieldCode, mustHave = false) {
|
|
381
|
-
nvdebug(`${subfieldCode} F1: ${fieldToString(field1)}
|
|
382
|
-
nvdebug(`${subfieldCode} F2: ${fieldToString(field2)}
|
|
381
|
+
nvdebug(`${subfieldCode} F1: ${fieldToString(field1)}`, debugDev);
|
|
382
|
+
nvdebug(`${subfieldCode} F2: ${fieldToString(field2)}`, debugDev);
|
|
383
383
|
const values1 = getRelevantSubfieldValues(field1, subfieldCode);
|
|
384
384
|
const values2 = getRelevantSubfieldValues(field2, subfieldCode);
|
|
385
385
|
|
|
@@ -393,7 +393,7 @@ function tightSubfieldMatch(field1, field2, subfieldCode, mustHave = false) {
|
|
|
393
393
|
return false;
|
|
394
394
|
}
|
|
395
395
|
|
|
396
|
-
nvdebug(`Compare $${subfieldCode} contents:\n '${values1.join("'\n '")}' vs\n '${values2.join("'\n '")}'
|
|
396
|
+
nvdebug(`Compare $${subfieldCode} contents:\n '${values1.join("'\n '")}' vs\n '${values2.join("'\n '")}'`, debugDev);
|
|
397
397
|
return values1.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values2)) && values2.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values1));
|
|
398
398
|
}
|
|
399
399
|
|
|
@@ -443,25 +443,25 @@ function semanticallyMergablePair(baseField, sourceField) {
|
|
|
443
443
|
const allRequired = mergeConstraints[0].required || ''; // getMergeConstraintsForTag(field1.tag, 'required') || '';
|
|
444
444
|
const reallyRequired = asteriMatch ? removeNameRelatedSubfieldCodes(allRequired, field1.tag) : allRequired;
|
|
445
445
|
|
|
446
|
-
//nvdebug(`WP1: '${allRequired}' => ${reallyRequired}
|
|
446
|
+
//nvdebug(`WP1: '${allRequired}' => ${reallyRequired}`, debugDev);
|
|
447
447
|
if (!reallyRequired.split('').every(c => tightSubfieldMatch(field1, field2, c, true))) {
|
|
448
448
|
return false;
|
|
449
449
|
}
|
|
450
450
|
|
|
451
451
|
const allPaired = mergeConstraints[0].paired || ''; // getMergeConstraintsForTag(field1.tag, 'paired') || '';
|
|
452
452
|
const reallyPaired = asteriMatch ? removeNameRelatedSubfieldCodes(allPaired, field1.tag) : allPaired;
|
|
453
|
-
//nvdebug(`WP2: '${allPaired}' => ${reallyPaired}
|
|
453
|
+
//nvdebug(`WP2: '${allPaired}' => ${reallyPaired}`, debugDev);
|
|
454
454
|
if (!reallyPaired.split('').every(c => tightSubfieldMatch(field1, field2, c, false))) {
|
|
455
455
|
return false;
|
|
456
456
|
}
|
|
457
457
|
|
|
458
458
|
const allKeys = mergeConstraints[0].key || ''; // getMergeConstraintsForTag(field1.tag, 'key') || '';
|
|
459
459
|
const relevantKeys = asteriMatch ? removeNameRelatedSubfieldCodes(allKeys, field1.tag) : allKeys
|
|
460
|
-
//nvdebug(`WP3: keys='${allKeys}' => ${relevantKeys}
|
|
460
|
+
//nvdebug(`WP3: keys='${allKeys}' => ${relevantKeys}`, debugDev);
|
|
461
461
|
if (!relevantKeys.split('').every(c => looseSubfieldMatch(field1, field2, c))) {
|
|
462
462
|
return false;
|
|
463
463
|
}
|
|
464
|
-
//nvdebug('WP4');
|
|
464
|
+
//nvdebug('WP4', debugDev);
|
|
465
465
|
|
|
466
466
|
// required/paired/keys checks did not fail. Now check that did they really succeed
|
|
467
467
|
if (allRequired.length > 0) { // I think we should use all here
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
import createDebugLogger from 'debug';
|
|
1
|
+
//import createDebugLogger from 'debug';
|
|
2
2
|
import {marc21GetTagsLegalInd1Value, marc21GetTagsLegalInd2Value} from '../utils.js';
|
|
3
3
|
|
|
4
4
|
// Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
|
|
5
5
|
|
|
6
|
-
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:merge-fields:mergableIndicator');
|
|
6
|
+
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:merge-fields:mergableIndicator');
|
|
7
7
|
//const debugData = debug.extend('data');
|
|
8
|
-
const debugDev = debug.extend('dev');
|
|
8
|
+
//const debugDev = debug.extend('dev');
|
|
9
9
|
|
|
10
10
|
const ind1NonFilingChars = ['130', '630', '730', '740'];
|
|
11
11
|
const ind2NonFilingChars = ['222', '240', '242', '243', '245', '830'];
|
|
@@ -1,7 +1,12 @@
|
|
|
1
1
|
//import {nvdebug} from '../utils.js';
|
|
2
|
+
//import createDebugLogger from 'debug';
|
|
2
3
|
|
|
3
4
|
// NB! This file (or at least synonyms) should eventually be moved away from merge to '..'.
|
|
4
5
|
|
|
6
|
+
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:worldKnowledge');
|
|
7
|
+
//const debugData = debug.extend('data');
|
|
8
|
+
//const debugDev = debug.extend('dev');
|
|
9
|
+
|
|
5
10
|
|
|
6
11
|
export function valueCarriesMeaning(tag, subfieldCode, value) {
|
|
7
12
|
// Some data is pretty meaningless and as meaningless is pretty close to nothing, this meaningless data should no prevent merge.
|
|
@@ -36,20 +41,20 @@ export function getSynonyms(term, tag = undefined, subfieldCode = undefined, pre
|
|
|
36
41
|
if (!term) {
|
|
37
42
|
return [];
|
|
38
43
|
}
|
|
39
|
-
//nvdebug(`WP1 CANDS: ${synonyms.length} FOR '${term}'
|
|
44
|
+
//nvdebug(`WP1 CANDS: ${synonyms.length} FOR '${term}'`, debugDev);
|
|
40
45
|
const relevantLanguges = relevantLanguagesString.split(/\s+/u);
|
|
41
46
|
const normalizedTerm = ignoreCase ? term.toLowerCase() : term;
|
|
42
47
|
const synonymsWithTag = tag ? synonyms.filter(s => s.tags.includes(tag)) : synonyms;
|
|
43
48
|
if (synonymsWithTag.length === 0) {
|
|
44
49
|
return [];
|
|
45
50
|
}
|
|
46
|
-
//nvdebug(`WP2 (FILTER ${tag}) CANDS: ${synonymsWithTag.length}
|
|
51
|
+
//nvdebug(`WP2 (FILTER ${tag}) CANDS: ${synonymsWithTag.length}`, debugDev);
|
|
47
52
|
const synonymsWithTagAndCode = subfieldCode ? synonymsWithTag.filter(s => s.code === subfieldCode) : synonymsWithTag;
|
|
48
|
-
//nvdebug(`WP3 (FILTER $${subfieldCode}) CANDS: ${synonymsWithTagAndCode.length}:\n${JSON.stringify(synonymsWithTagAndCode)}
|
|
53
|
+
//nvdebug(`WP3 (FILTER $${subfieldCode}) CANDS: ${synonymsWithTagAndCode.length}:\n${JSON.stringify(synonymsWithTagAndCode)}`, debugDev);
|
|
49
54
|
const matchingSynonyms = synonymsWithTagAndCode.filter(s => termAndLangMatch(s));
|
|
50
55
|
|
|
51
56
|
if (preferredLanguage && matchingSynonyms.length > 0) {
|
|
52
|
-
//console.log(`USING PREFERRED LANG '${preferredLanguage}' for TERM '${term}':\n${JSON.stringify(matchingSynonyms)}
|
|
57
|
+
//console.log(`USING PREFERRED LANG '${preferredLanguage}' for TERM '${term}':\n${JSON.stringify(matchingSynonyms)}`, debugDev);
|
|
53
58
|
return matchingSynonyms.map(s => s[preferredLanguage]);
|
|
54
59
|
}
|
|
55
60
|
return matchingSynonyms;
|
|
@@ -79,7 +84,7 @@ export function getSynonyms(term, tag = undefined, subfieldCode = undefined, pre
|
|
|
79
84
|
export function getSynonym(tag, subfieldCode, originalValue) {
|
|
80
85
|
const finnishForm = getSynonyms(originalValue, tag, subfieldCode, 'fin');
|
|
81
86
|
if (finnishForm.length === 1) {
|
|
82
|
-
//nvdebug(`FINNISH FORM FOR ${tag}$${subfieldCode}: '${finnishForm[0]}'
|
|
87
|
+
//nvdebug(`FINNISH FORM FOR ${tag}$${subfieldCode}: '${finnishForm[0]}'`, debugDev);
|
|
83
88
|
return finnishForm[0];
|
|
84
89
|
}
|
|
85
90
|
return originalValue;
|
|
@@ -90,7 +95,7 @@ export function normalizeForSamenessCheck(tag, subfieldCode, originalValue) {
|
|
|
90
95
|
// Repeatable subfields are currently handled in mergeSubfields.js. Only non-repeatable subfields block field merge,
|
|
91
96
|
// (This split is suboptiomal... Minimum fix: make this distinction cleaner...)
|
|
92
97
|
|
|
93
|
-
//nvdebug(`TRYING TO DO ${tag}$${subfieldCode} '${originalValue}'
|
|
98
|
+
//nvdebug(`TRYING TO DO ${tag}$${subfieldCode} '${originalValue}'`, debugDev);
|
|
94
99
|
originalValue = getSynonym(tag, subfieldCode, originalValue);
|
|
95
100
|
|
|
96
101
|
if (subfieldCode === 'a' && ['100', '600', '700', '800'].includes(tag)) { // "Etunimi Sukunimi"...
|
|
@@ -11,11 +11,12 @@ import {fieldFixPunctuation, fieldStripPunctuation} from './punctuation2.js';
|
|
|
11
11
|
import {fieldToString, nvdebug} from './utils.js';
|
|
12
12
|
import {sortAdjacentSubfields} from './sortSubfields.js';
|
|
13
13
|
import {sortAdjacentRelatorTerms, tagToRelatorTermSubfieldCode} from './sortRelatorTerms.js';
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
import createDebugLogger from 'debug';
|
|
15
|
+
|
|
16
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:mergeRelatorTermFields');
|
|
17
17
|
//const debugData = debug.extend('data');
|
|
18
|
-
|
|
18
|
+
const debugDev = debug.extend('dev');
|
|
19
|
+
|
|
19
20
|
|
|
20
21
|
export default function () {
|
|
21
22
|
|
|
@@ -80,31 +81,31 @@ function copyRelatorSubfields(fromField, toField) {
|
|
|
80
81
|
|
|
81
82
|
function mergeRelatorTermFields(record, fix = false) {
|
|
82
83
|
// NV: 111/711, 751 and 752 where so rare that I did not add them here. Can't remember why I skipped 6XX and 8XX...
|
|
83
|
-
let fields = record.get('(?:[17][01]0|720)');
|
|
84
|
+
let fields = record.get('(?:[17][01]0|720)');
|
|
84
85
|
let result = [];
|
|
85
86
|
const comparisonFieldsAsString = fields.map(f => fieldToString(createNormalizedCloneWithoutRelatorTerms(f)));
|
|
86
87
|
|
|
87
|
-
nvdebug(`mergeRelatorTermFields(): ${fields.length} cand field(s) found
|
|
88
|
+
nvdebug(`mergeRelatorTermFields(): ${fields.length} cand field(s) found`, debugDev);
|
|
88
89
|
for(let i=0; i < fields.length-1; i++) {
|
|
89
90
|
let currField = fields[i];
|
|
90
91
|
if (currField.deleted) {
|
|
91
92
|
continue;
|
|
92
93
|
}
|
|
93
|
-
nvdebug(`RT: Trying to pair ${comparisonFieldsAsString[i]}/${i}
|
|
94
|
+
nvdebug(`RT: Trying to pair ${comparisonFieldsAsString[i]}/${i}`, debugDev);
|
|
94
95
|
for (let j=i+1; j < fields.length; j++) {
|
|
95
|
-
nvdebug(` Compare with ${comparisonFieldsAsString[j]}/${j}
|
|
96
|
+
nvdebug(` Compare with ${comparisonFieldsAsString[j]}/${j}`, debugDev);
|
|
96
97
|
let mergableField = fields[j];
|
|
97
98
|
// Skip 1/7 from 1XX/7XX for similarity check:
|
|
98
99
|
if ( comparisonFieldsAsString[i].substring(1) !== comparisonFieldsAsString[j].substring(1)) {
|
|
99
|
-
nvdebug(" NOT PAIR");
|
|
100
|
+
nvdebug(" NOT PAIR", debugDev);
|
|
100
101
|
continue;
|
|
101
102
|
}
|
|
102
103
|
if (mergableField.deleted) {
|
|
103
|
-
nvdebug(" DELETED");
|
|
104
|
+
nvdebug(" DELETED", debugDev);
|
|
104
105
|
continue;
|
|
105
106
|
}
|
|
106
107
|
const str = `MERGE RELATOR TERM FIELD: ${fieldToString(mergableField)}`;
|
|
107
|
-
nvdebug(str);
|
|
108
|
+
nvdebug(str, debugDev);
|
|
108
109
|
|
|
109
110
|
if(!result.includes(str)) {
|
|
110
111
|
result.push(str)
|
package/src/normalize-dashes.js
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
|
-
//import createDebugLogger from 'debug';
|
|
2
1
|
import clone from 'clone';
|
|
3
2
|
import {fieldToString, isContentSubfieldCode, nvdebug} from './utils.js';
|
|
4
3
|
|
|
4
|
+
import createDebugLogger from 'debug';
|
|
5
|
+
|
|
6
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:normaliza-dashes');
|
|
7
|
+
//const debugData = debug.extend('data');
|
|
8
|
+
const debugDev = debug.extend('dev');
|
|
9
|
+
|
|
10
|
+
|
|
5
11
|
// Author(s): Nicholas Volk
|
|
6
12
|
export default function () {
|
|
7
13
|
|
|
@@ -11,7 +17,7 @@ export default function () {
|
|
|
11
17
|
};
|
|
12
18
|
|
|
13
19
|
function fix(record) {
|
|
14
|
-
nvdebug(`FIX ME
|
|
20
|
+
nvdebug(`FIX ME`, debugDev);
|
|
15
21
|
record.fields.forEach(field => {
|
|
16
22
|
fixDashes(field);
|
|
17
23
|
});
|
|
@@ -23,7 +29,7 @@ export default function () {
|
|
|
23
29
|
function validate(record) {
|
|
24
30
|
const res = {message: []};
|
|
25
31
|
|
|
26
|
-
nvdebug(`VALIDATE ME
|
|
32
|
+
nvdebug(`VALIDATE ME`, debugDev);
|
|
27
33
|
record.fields?.forEach(field => {
|
|
28
34
|
validateField(field, res);
|
|
29
35
|
});
|
|
@@ -34,7 +40,7 @@ export default function () {
|
|
|
34
40
|
|
|
35
41
|
function validateField(field, res) {
|
|
36
42
|
const orig = fieldToString(field);
|
|
37
|
-
nvdebug(` VALIDATE FIELD '${orig}'
|
|
43
|
+
nvdebug(` VALIDATE FIELD '${orig}'`, debugDev);
|
|
38
44
|
|
|
39
45
|
const normalizedField = fixDashes(clone(field));
|
|
40
46
|
const mod = fieldToString(normalizedField);
|
|
@@ -52,7 +58,7 @@ function fixDashes(field) {
|
|
|
52
58
|
return field;
|
|
53
59
|
}
|
|
54
60
|
|
|
55
|
-
nvdebug(`Dashing ${fieldToString(field)}
|
|
61
|
+
nvdebug(`Dashing ${fieldToString(field)}`, debugDev);
|
|
56
62
|
|
|
57
63
|
field.subfields.forEach(sf => subfieldFixDashes(sf));
|
|
58
64
|
|
|
@@ -1,17 +1,10 @@
|
|
|
1
1
|
// Relocated from melinda-marc-record-merge-reducers (and renamed)
|
|
2
|
-
//import createDebugLogger from 'debug';
|
|
3
2
|
import clone from 'clone';
|
|
4
|
-
import {fieldToString} from './utils.js';
|
|
5
|
-
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:normalize-identifiers');
|
|
3
|
+
import {fieldToString, /*nvdebug*/} from './utils.js';
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
func(message);
|
|
11
|
-
}
|
|
12
|
-
console.info(message); // eslint-disable-line no-console
|
|
13
|
-
}
|
|
14
|
-
*/
|
|
5
|
+
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:normalize-identifiers');
|
|
6
|
+
//const debugData = debug.extend('data');
|
|
7
|
+
//const debugDev = debug.extend('dev');
|
|
15
8
|
|
|
16
9
|
export default function () {
|
|
17
10
|
|
|
@@ -32,9 +25,9 @@ export default function () {
|
|
|
32
25
|
}
|
|
33
26
|
*/
|
|
34
27
|
|
|
35
|
-
//nvdebug(`NORMALIZE CONTROL NUMBER FIX`,
|
|
28
|
+
//nvdebug(`NORMALIZE CONTROL NUMBER FIX`, debugDev);
|
|
36
29
|
record.fields.forEach(field => {
|
|
37
|
-
//nvdebug(` NORMALIZE CONTROL NUMBER FIX ${fieldToString(field)}`,
|
|
30
|
+
//nvdebug(` NORMALIZE CONTROL NUMBER FIX ${fieldToString(field)}`, debugDev);
|
|
38
31
|
|
|
39
32
|
fieldNormalizeControlNumbers(field);
|
|
40
33
|
//validateField(field, true, message);
|
|
@@ -46,7 +39,7 @@ export default function () {
|
|
|
46
39
|
|
|
47
40
|
function validate(record) {
|
|
48
41
|
const res = {message: []};
|
|
49
|
-
//nvdebug(`NORMALIZE CONTROL NUMBER VALIDATE`,
|
|
42
|
+
//nvdebug(`NORMALIZE CONTROL NUMBER VALIDATE`, debugDev);
|
|
50
43
|
// Actual parsing of all fields
|
|
51
44
|
/*
|
|
52
45
|
if (!record.fields) {
|
|
@@ -55,7 +48,7 @@ export default function () {
|
|
|
55
48
|
*/
|
|
56
49
|
|
|
57
50
|
record.fields.forEach(field => {
|
|
58
|
-
//nvdebug(` NORMALIZE CONTROL NUMBER VALIDATE ${fieldToString(field)}`,
|
|
51
|
+
//nvdebug(` NORMALIZE CONTROL NUMBER VALIDATE ${fieldToString(field)}`, debugDev);
|
|
59
52
|
validateField(field, res);
|
|
60
53
|
});
|
|
61
54
|
|
|
@@ -126,8 +119,8 @@ function normalizeNineDigitIDs(value, targetFormat = 'ALEPH_INTERNAL') {
|
|
|
126
119
|
const currPrefix = value.slice(0, -9);
|
|
127
120
|
|
|
128
121
|
if (currPrefix in mappings) {
|
|
129
|
-
//nvdebug(`${currPrefix}, TF:${targetFormat}
|
|
130
|
-
//nvdebug(`${JSON.stringify(mappings[currPrefix])}
|
|
122
|
+
//nvdebug(`${currPrefix}, TF:${targetFormat}...`, debugDev);
|
|
123
|
+
//nvdebug(`${JSON.stringify(mappings[currPrefix])}`, debugDev);
|
|
131
124
|
return `${mappings[currPrefix][targetFormat]}${nineDigitTail}`;
|
|
132
125
|
}
|
|
133
126
|
return value;
|
|
@@ -167,7 +160,7 @@ export function normalizeControlSubfieldValue(value = '', targetFormat = 'ALEPH_
|
|
|
167
160
|
|
|
168
161
|
//export function normalizableSubfieldPrefix(tag, sf) {
|
|
169
162
|
export function normalizeAs(tag, subfieldCode) {
|
|
170
|
-
//nvdebug(`nAs ${tag}, ${subfieldCode}
|
|
163
|
+
//nvdebug(`nAs ${tag}, ${subfieldCode}`, debugDev);
|
|
171
164
|
if (subfieldCode === '0' || subfieldCode === '1' || subfieldCode === 'w') {
|
|
172
165
|
return 'ALEPH_INTERNAL';
|
|
173
166
|
}
|
|
@@ -189,7 +182,7 @@ export function fieldNormalizeControlNumbers(field) {
|
|
|
189
182
|
field.subfields.forEach(sf => {
|
|
190
183
|
const targetFormat = normalizeAs(field.tag, sf.code);
|
|
191
184
|
if (targetFormat !== undefined) {
|
|
192
|
-
//nvdebug(`NORMALIZE SUBFIELD $${sf.code} IN FIELD: '${fieldToString(field)}' TO ${targetFormat}
|
|
185
|
+
//nvdebug(`NORMALIZE SUBFIELD $${sf.code} IN FIELD: '${fieldToString(field)}' TO ${targetFormat}`, debugDev);
|
|
193
186
|
sf.value = normalizeControlSubfieldValue(sf.value, targetFormat);
|
|
194
187
|
return;
|
|
195
188
|
}
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
//import createDebugLogger from 'debug';
|
|
2
2
|
import clone from 'clone';
|
|
3
3
|
import {convert as nongenericNormalization} from './unicode-decomposition.js';
|
|
4
|
-
import {fieldToString} from './utils.js';
|
|
4
|
+
import {fieldToString /*, nvdebug */} from './utils.js';
|
|
5
5
|
|
|
6
6
|
// Note that https://github.com/NatLibFi/marc-record-validators-melinda/blob/master/src/unicode-decomposition.js contains
|
|
7
7
|
// similar functionalities. It's less generic and lacks diacritic removal but has it advantages as well.
|
|
8
8
|
|
|
9
9
|
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/normalize-utf-diacritics');
|
|
10
|
+
//const debugData = debug.extend('data');
|
|
11
|
+
//const debugDev = debug.extend('dev');
|
|
12
|
+
|
|
10
13
|
|
|
11
14
|
// See also https://github.com/NatLibFi/marc-record-validators-melinda/blob/master/src/unicode-decomposition.js .
|
|
12
15
|
// It uses a list of convertable characters whilst this uses a generic stuff as well.
|
|
@@ -111,13 +114,13 @@ export function fieldFixComposition(field) {
|
|
|
111
114
|
return field;
|
|
112
115
|
}
|
|
113
116
|
//const originalValue = fieldToString(field);
|
|
114
|
-
//nvdebug(`fFC: '${originalValue}'`,
|
|
117
|
+
//nvdebug(`fFC: '${originalValue}'`, debugDev);
|
|
115
118
|
field.subfields.forEach((subfield, index) => {
|
|
116
119
|
field.subfields[index].value = fixComposition(subfield.value);
|
|
117
120
|
});
|
|
118
121
|
//const newValue = fieldToString(field);
|
|
119
122
|
//if (originalValue !== newValue) {
|
|
120
|
-
//
|
|
123
|
+
// nvdebug(`FIXCOMP: '${originalValue}' => '${newValue}'`, debugDev);
|
|
121
124
|
//}
|
|
122
125
|
return field;
|
|
123
126
|
}
|
|
@@ -185,7 +185,7 @@ function normalizeISBN(field) {
|
|
|
185
185
|
return;
|
|
186
186
|
}
|
|
187
187
|
|
|
188
|
-
//nvdebug(`ISBN-field? ${fieldToString(field)}
|
|
188
|
+
//nvdebug(`ISBN-field? ${fieldToString(field)}`, debugDev);
|
|
189
189
|
const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));
|
|
190
190
|
relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));
|
|
191
191
|
|
|
@@ -193,7 +193,7 @@ function normalizeISBN(field) {
|
|
|
193
193
|
if (valuelessSubfield(sf)) {
|
|
194
194
|
return;
|
|
195
195
|
}
|
|
196
|
-
//nvdebug(` ISBN-subfield? ${subfieldToString(sf)}
|
|
196
|
+
//nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`, debugDev);
|
|
197
197
|
sf.value = sf.value.replace(/-/ug, '');
|
|
198
198
|
sf.value = sf.value.replace(/x/u, 'X');
|
|
199
199
|
}
|
|
@@ -105,8 +105,8 @@ export function partsAgree(value1, value2, tag, subfieldCode) {
|
|
|
105
105
|
}
|
|
106
106
|
const [partType1, partYear1, partNumber1] = splitAndNormalizePartData(value1);
|
|
107
107
|
const [partType2, partYear2, partNumber2] = splitAndNormalizePartData(value2);
|
|
108
|
-
//nvdebug(`P1: ${partType1} | ${partYear1} | ${partNumber1}
|
|
109
|
-
//nvdebug(`P2: ${partType2} | ${partYear2} | ${partNumber2}
|
|
108
|
+
//nvdebug(`P1: ${partType1} | ${partYear1} | ${partNumber1}`, debugDev);
|
|
109
|
+
//nvdebug(`P2: ${partType2} | ${partYear2} | ${partNumber2}`, debugDev);
|
|
110
110
|
if (partNumber1 !== partNumber2) {
|
|
111
111
|
return false;
|
|
112
112
|
}
|
package/src/punctuation2.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
/* eslint-disable max-lines */
|
|
1
2
|
/*
|
|
2
3
|
* punctuation.js -- try and fix a marc field punctuation
|
|
3
4
|
*
|
|
@@ -12,11 +13,13 @@
|
|
|
12
13
|
import {validateSingleField} from './ending-punctuation.js';
|
|
13
14
|
import {tagToDataProvenanceSubfieldCode} from './dataProvenanceUtils.js';
|
|
14
15
|
import {fieldGetUnambiguousTag} from './subfield6Utils.js';
|
|
15
|
-
|
|
16
|
+
import createDebugLogger from 'debug';
|
|
16
17
|
import {fieldToString, isContentSubfieldCode, nvdebug} from './utils.js';
|
|
17
18
|
import clone from 'clone';
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:punctuation2');
|
|
21
|
+
//const debugData = debug.extend('data');
|
|
22
|
+
const debugDev = debug.extend('dev');
|
|
20
23
|
|
|
21
24
|
const descriptionString = 'Remove invalid and add valid punctuation to data fields';
|
|
22
25
|
export default function () {
|
|
@@ -26,14 +29,14 @@ export default function () {
|
|
|
26
29
|
};
|
|
27
30
|
|
|
28
31
|
function fix(record) {
|
|
29
|
-
nvdebug(`${descriptionString}: fixer
|
|
32
|
+
nvdebug(`${descriptionString}: fixer`, debugDev);
|
|
30
33
|
const res = {message: [], fix: [], valid: true};
|
|
31
34
|
record.fields.forEach(f => fieldFixPunctuation(f));
|
|
32
35
|
return res;
|
|
33
36
|
}
|
|
34
37
|
|
|
35
38
|
function validate(record) {
|
|
36
|
-
nvdebug(`${descriptionString}: validate
|
|
39
|
+
nvdebug(`${descriptionString}: validate`, debugDev);
|
|
37
40
|
|
|
38
41
|
const fieldsNeedingModification = record.fields.filter(f => fieldNeedsModification(f, true));
|
|
39
42
|
|
|
@@ -396,19 +399,19 @@ const addPairedPunctuationRules = {
|
|
|
396
399
|
|
|
397
400
|
/*
|
|
398
401
|
function debugRule(rule) {
|
|
399
|
-
//nvdebug('');
|
|
400
|
-
nvdebug(`NAME ${rule.name ? rule.name : '<unnamed>'}
|
|
401
|
-
nvdebug(`SUBFIELD CODE '${rule.code}' FOLLOWED BY SUBFIELD CODE '${rule.followedBy}'
|
|
402
|
+
//nvdebug('', debugDev);
|
|
403
|
+
nvdebug(`NAME ${rule.name ? rule.name : '<unnamed>'}`, debugDev);
|
|
404
|
+
nvdebug(`SUBFIELD CODE '${rule.code}' FOLLOWED BY SUBFIELD CODE '${rule.followedBy}'`, debugDev);
|
|
402
405
|
if ('add' in rule) {
|
|
403
|
-
nvdebug(`ADD '${rule.add}'
|
|
406
|
+
nvdebug(`ADD '${rule.add}'`, debugDev);
|
|
404
407
|
}
|
|
405
408
|
if ('remove' in rule) {
|
|
406
|
-
nvdebug(`REMOVE '${rule.remove}'
|
|
409
|
+
nvdebug(`REMOVE '${rule.remove}'`, debugDev);
|
|
407
410
|
}
|
|
408
411
|
if ('context' in rule) {
|
|
409
|
-
nvdebug(`CONTEXT '${rule.context.toString()}'
|
|
412
|
+
nvdebug(`CONTEXT '${rule.context.toString()}'`, debugDev);
|
|
410
413
|
}
|
|
411
|
-
//nvdebug('');
|
|
414
|
+
//nvdebug('', debugDev);
|
|
412
415
|
}
|
|
413
416
|
*/
|
|
414
417
|
|
|
@@ -440,19 +443,19 @@ function ruleAppliesToField(rule, field) {
|
|
|
440
443
|
|
|
441
444
|
|
|
442
445
|
function ruleAppliesToCurrentSubfield(rule, subfield) {
|
|
443
|
-
//nvdebug(` Apply rule on LHS
|
|
446
|
+
//nvdebug(` Apply rule on LHS?`, debugDev);
|
|
444
447
|
if (!ruleAppliesToSubfieldCode(rule.code, subfield.code)) {
|
|
445
|
-
//nvdebug(` Reject rule
|
|
448
|
+
//nvdebug(` Reject rule!`, debugDev);
|
|
446
449
|
return false;
|
|
447
450
|
}
|
|
448
451
|
if ('context' in rule) {
|
|
449
|
-
//nvdebug(` Check '${subfield.value}' versus '${rule.context.toString()}'
|
|
452
|
+
//nvdebug(` Check '${subfield.value}' versus '${rule.context.toString()}'`, debugDev);
|
|
450
453
|
if (!subfield.value.match(rule.context)) { // njsscan-ignore: regex_injection_dos
|
|
451
|
-
//nvdebug(` Reject rule
|
|
454
|
+
//nvdebug(` Reject rule!`, debugDev);
|
|
452
455
|
return false;
|
|
453
456
|
}
|
|
454
457
|
}
|
|
455
|
-
//nvdebug(` Apply rule
|
|
458
|
+
//nvdebug(` Apply rule!`, debugDev);
|
|
456
459
|
return true;
|
|
457
460
|
}
|
|
458
461
|
|
|
@@ -480,23 +483,24 @@ function ruleAppliesToNextSubfield(rule, nextSubfield) {
|
|
|
480
483
|
|
|
481
484
|
function checkRule(rule, field, subfield1, subfield2) {
|
|
482
485
|
if (!ruleAppliesToField(rule, field)) {
|
|
483
|
-
//nvdebug(`FAIL ON WHOLE FIELD: '${fieldToString(field)}
|
|
486
|
+
//nvdebug(`FAIL ON WHOLE FIELD: '${fieldToString(field)}`, debugDev);
|
|
484
487
|
return false;
|
|
485
488
|
}
|
|
486
489
|
//const name = rule.name || 'UNNAMED';
|
|
487
490
|
if (!ruleAppliesToCurrentSubfield(rule, subfield1)) {
|
|
488
|
-
//nvdebug(`${name}: FAIL ON LHS SUBFIELD: '$${subfield1.code} ${subfield1.value}', SF=${rule.code}`,
|
|
491
|
+
//nvdebug(`${name}: FAIL ON LHS SUBFIELD: '$${subfield1.code} ${subfield1.value}', SF=${rule.code}`, debugDev);
|
|
489
492
|
return false;
|
|
490
493
|
}
|
|
491
494
|
|
|
492
495
|
// NB! This is not a perfect solution. We might have $e$0$e where $e$0 punctuation should actually be based on $e$e rules
|
|
493
496
|
if (!ruleAppliesToNextSubfield(rule, subfield2)) {
|
|
494
497
|
//const msg = subfield2 ? `${name}: FAIL ON RHS SUBFIELD '${subfield2.code}' not in [${rule.followedBy}]` : `${name}: FAIL ON RHS FIELD`;
|
|
495
|
-
//nvdebug(msg,
|
|
498
|
+
//nvdebug(msg, debugDev);
|
|
496
499
|
return false;
|
|
497
500
|
}
|
|
498
501
|
|
|
499
|
-
|
|
502
|
+
|
|
503
|
+
//nvdebug(`${rule.name ? rule.name : '<unnamed>'}: ACCEPT ${rule.code} (${subfield1.code}), SF2=${rule.followedBy} (${subfield2 ? subfield2.code : '#'})`, debugDev);
|
|
500
504
|
return true;
|
|
501
505
|
}
|
|
502
506
|
|
|
@@ -513,9 +517,9 @@ function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, op
|
|
|
513
517
|
return;
|
|
514
518
|
}
|
|
515
519
|
|
|
516
|
-
//nvdebug(`PUNCTUATE ${field.tag}/${tag2} '${subfieldToString(subfield1)}' XXX '${subfield2 ? subfieldToString(subfield2) : '#'} }
|
|
520
|
+
//nvdebug(`PUNCTUATE ${field.tag}/${tag2} '${subfieldToString(subfield1)}' XXX '${subfield2 ? subfieldToString(subfield2) : '#'} }`, debugDev);
|
|
517
521
|
|
|
518
|
-
//nvdebug(`OP=${operation} ${tag2}: '${subfield1.code}: ${subfield1.value}' ??? '${subfield2 ? subfield2.code : '#'}'
|
|
522
|
+
//nvdebug(`OP=${operation} ${tag2}: '${subfield1.code}: ${subfield1.value}' ??? '${subfield2 ? subfield2.code : '#'}'`, debugDev);
|
|
519
523
|
const candRules = ruleArray[tag2];
|
|
520
524
|
candRules.every(rule => { // uses "every", not "forEach", so that only one rule is applies to the given subfields
|
|
521
525
|
//debugRule(rule);
|
|
@@ -525,20 +529,20 @@ function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, op
|
|
|
525
529
|
|
|
526
530
|
//const originalValue = subfield1.value;
|
|
527
531
|
if (rule.remove && [REMOVE, REMOVE_AND_ADD].includes(operation) && subfield1.value.match(rule.remove)) {
|
|
528
|
-
//nvdebug(` PUNC REMOVAL TO BE PERFORMED FOR $${subfield1.code} '${subfield1.value}'`,
|
|
532
|
+
//nvdebug(` PUNC REMOVAL TO BE PERFORMED FOR $${subfield1.code} '${subfield1.value}'`, debugDev);
|
|
529
533
|
subfield1.value = subfield1.value.replace(rule.remove, '');
|
|
530
|
-
//nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'
|
|
534
|
+
//nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'`, debugDev);
|
|
531
535
|
return false;
|
|
532
536
|
}
|
|
533
537
|
if (rule.add && [ADD, REMOVE_AND_ADD].includes(operation)) {
|
|
534
538
|
subfield1.value += rule.add;
|
|
535
|
-
//nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}' USING RULE ${rule.name}
|
|
539
|
+
//nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}' USING RULE ${rule.name}`, debugDev);
|
|
536
540
|
return false;
|
|
537
541
|
}
|
|
538
542
|
|
|
539
543
|
/*
|
|
540
544
|
if (subfield1.value !== originalValue) {
|
|
541
|
-
nvdebug(` PROCESS PUNC: '‡${subfield1.code} ${originalValue}' => '‡${subfield1.code} ${subfield1.value}'`,
|
|
545
|
+
nvdebug(` PROCESS PUNC: '‡${subfield1.code} ${originalValue}' => '‡${subfield1.code} ${subfield1.value}'`, debugDev);
|
|
542
546
|
}
|
|
543
547
|
*/
|
|
544
548
|
|
|
@@ -552,11 +556,11 @@ function subfieldFixPunctuation(field, subfield1, subfield2) {
|
|
|
552
556
|
}
|
|
553
557
|
|
|
554
558
|
function subfieldStripPunctuation(field, subfield1, subfield2) {
|
|
555
|
-
//nvdebug(`FSP1: '${subfield1.value}'
|
|
559
|
+
//nvdebug(`FSP1: '${subfield1.value}'`, debugDev);
|
|
556
560
|
applyPunctuationRules(field, subfield1, subfield2, cleanValidPunctuationRules, REMOVE);
|
|
557
|
-
//nvdebug(`FSP2: '${subfield1.value}'
|
|
561
|
+
//nvdebug(`FSP2: '${subfield1.value}'`, debugDev);
|
|
558
562
|
applyPunctuationRules(field, subfield1, subfield2, cleanCrappyPunctuationRules, REMOVE);
|
|
559
|
-
//nvdebug(`FSP3: '${subfield1.value}'
|
|
563
|
+
//nvdebug(`FSP3: '${subfield1.value}'`, debugDev);
|
|
560
564
|
|
|
561
565
|
}
|
|
562
566
|
|
|
@@ -578,7 +582,7 @@ export function fieldFixPunctuation(field) {
|
|
|
578
582
|
if (!field.subfields) {
|
|
579
583
|
return field;
|
|
580
584
|
}
|
|
581
|
-
//nvdebug(`################### fieldFixPunctuation() TEST ${fieldToString(field)}
|
|
585
|
+
//nvdebug(`################### fieldFixPunctuation() TEST ${fieldToString(field)}`, debugDev);
|
|
582
586
|
|
|
583
587
|
field.subfields.forEach((sf, i) => {
|
|
584
588
|
// NB! instead of next subfield, we should actually get next *non-control-subfield*!!!
|