@natlibfi/marc-record-validators-melinda 11.3.1 → 11.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/dist/melindaCustomMergeFields.json +5120 -0
- package/dist/merge-fields/config.json +83 -0
- package/dist/merge-fields/controlSubfields.js +278 -0
- package/dist/merge-fields/controlSubfields.js.map +1 -0
- package/dist/merge-fields/counterpartField.js +674 -0
- package/dist/merge-fields/counterpartField.js.map +1 -0
- package/dist/merge-fields/index.js +76 -0
- package/dist/merge-fields/index.js.map +1 -0
- package/dist/merge-fields/mergableIndicator.js +95 -0
- package/dist/merge-fields/mergableIndicator.js.map +1 -0
- package/dist/merge-fields/mergableTag.js +33 -0
- package/dist/merge-fields/mergableTag.js.map +1 -0
- package/dist/merge-fields/mergeConstraints.js +1225 -0
- package/dist/merge-fields/mergeConstraints.js.map +1 -0
- package/dist/merge-fields/mergeField.js +190 -0
- package/dist/merge-fields/mergeField.js.map +1 -0
- package/dist/merge-fields/mergeIndicator.js +171 -0
- package/dist/merge-fields/mergeIndicator.js.map +1 -0
- package/dist/merge-fields/mergeOrAddPostprocess.js +57 -0
- package/dist/merge-fields/mergeOrAddPostprocess.js.map +1 -0
- package/dist/merge-fields/mergeOrAddSubfield.js +203 -0
- package/dist/merge-fields/mergeOrAddSubfield.js.map +1 -0
- package/dist/merge-fields/mergeSubfield.js +277 -0
- package/dist/merge-fields/mergeSubfield.js.map +1 -0
- package/dist/merge-fields/removeDuplicateSubfields.js +48 -0
- package/dist/merge-fields/removeDuplicateSubfields.js.map +1 -0
- package/dist/merge-fields/worldKnowledge.js +98 -0
- package/dist/merge-fields/worldKnowledge.js.map +1 -0
- package/dist/merge-fields.spec.js +51 -0
- package/dist/merge-fields.spec.js.map +1 -0
- package/dist/subfield6Utils.js +16 -1
- package/dist/subfield6Utils.js.map +1 -1
- package/dist/utils.js +108 -0
- package/dist/utils.js.map +1 -1
- package/package.json +6 -6
- package/src/index.js +3 -1
- package/src/melindaCustomMergeFields.json +5120 -0
- package/src/merge-fields/config.json +83 -0
- package/src/merge-fields/controlSubfields.js +307 -0
- package/src/merge-fields/counterpartField.js +736 -0
- package/src/merge-fields/index.js +69 -0
- package/src/merge-fields/mergableIndicator.js +90 -0
- package/src/merge-fields/mergableTag.js +89 -0
- package/src/merge-fields/mergeConstraints.js +309 -0
- package/src/merge-fields/mergeField.js +187 -0
- package/src/merge-fields/mergeIndicator.js +185 -0
- package/src/merge-fields/mergeOrAddPostprocess.js +56 -0
- package/src/merge-fields/mergeOrAddSubfield.js +218 -0
- package/src/merge-fields/mergeSubfield.js +306 -0
- package/src/merge-fields/removeDuplicateSubfields.js +50 -0
- package/src/merge-fields/worldKnowledge.js +104 -0
- package/src/merge-fields.spec.js +52 -0
- package/src/subfield6Utils.js +14 -1
- package/src/utils.js +119 -0
- package/test-fixtures/merge-fields/f01/expectedResult.json +11 -0
- package/test-fixtures/merge-fields/f01/metadata.json +5 -0
- package/test-fixtures/merge-fields/f01/record.json +13 -0
- package/test-fixtures/merge-fields/f02/expectedResult.json +14 -0
- package/test-fixtures/merge-fields/f02/metadata.json +6 -0
- package/test-fixtures/merge-fields/f02/record.json +16 -0
- package/test-fixtures/merge-fields/f03/expectedResult.json +17 -0
- package/test-fixtures/merge-fields/f03/metadata.json +7 -0
- package/test-fixtures/merge-fields/f03/record.json +23 -0
- package/test-fixtures/merge-fields/f04/expectedResult.json +14 -0
- package/test-fixtures/merge-fields/f04/metadata.json +5 -0
- package/test-fixtures/merge-fields/f04/record.json +19 -0
- package/test-fixtures/merge-fields/v01/expectedResult.json +6 -0
- package/test-fixtures/merge-fields/v01/metadata.json +5 -0
- package/test-fixtures/merge-fields/v01/record.json +13 -0
- package/test-fixtures/merge-fields/v02/expectedResult.json +4 -0
- package/test-fixtures/merge-fields/v02/metadata.json +5 -0
- package/test-fixtures/merge-fields/v02/record.json +13 -0
- package/test-fixtures/merge-fields/v03/expectedResult.json +6 -0
- package/test-fixtures/merge-fields/v03/metadata.json +6 -0
- package/test-fixtures/merge-fields/v03/record.json +16 -0
- package/test-fixtures/merge-fields/v04/expectedResult.json +4 -0
- package/test-fixtures/merge-fields/v04/metadata.json +6 -0
- package/test-fixtures/merge-fields/v04/record.json +16 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
//import {MarcRecord} from '@natlibfi/marc-record';
|
|
2
|
+
import createDebugLogger from 'debug';
|
|
3
|
+
import {fieldHasSubfield, fieldToString, fieldsToString, fieldsAreIdentical, nvdebug, hasCopyright, removeCopyright, subfieldToString} from '../utils';
|
|
4
|
+
import {fieldGetOccurrenceNumberPairs} from '../subfield6Utils.js';
|
|
5
|
+
import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation} from '../normalizeFieldForComparison';
|
|
6
|
+
import {mergeOrAddSubfield} from './mergeOrAddSubfield';
|
|
7
|
+
import {mergeIndicators} from './mergeIndicator';
|
|
8
|
+
import {mergableTag} from './mergableTag';
|
|
9
|
+
import {getCounterpart} from './counterpartField';
|
|
10
|
+
//import {default as normalizeEncoding} from '@natlibfi/marc-record-validators-melinda/dist/normalize-utf8-diacritics';
|
|
11
|
+
//import {postprocessRecords} from './mergeOrAddPostprocess.js';
|
|
12
|
+
//import {preprocessBeforeAdd} from './processFilter.js';
|
|
13
|
+
|
|
14
|
+
//import fs from 'fs';
|
|
15
|
+
//import path from 'path';
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
//const defaultConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '..', '..', 'src', 'reducers', 'config.json'), 'utf8'));
|
|
19
|
+
|
|
20
|
+
// Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
|
|
21
|
+
|
|
22
|
+
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:mergeField');
|
|
23
|
+
//const debugData = debug.extend('data');
|
|
24
|
+
const debugDev = debug.extend('dev');
|
|
25
|
+
|
|
26
|
+
// NB! Can be do this via config.json?
|
|
27
|
+
function removeEnnakkotieto(field) {
|
|
28
|
+
const tmp = field.subfields.filter(subfield => subfield.code !== 'g' || subfield.value !== 'ENNAKKOTIETO.');
|
|
29
|
+
// remove only iff some other subfield remains
|
|
30
|
+
if (tmp.length > 0) { // eslint-disable-line functional/no-conditional-statements
|
|
31
|
+
field.subfields = tmp; // eslint-disable-line functional/immutable-data
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
function copyrightYearHack(baseRecord, baseField, sourceField) {
|
|
37
|
+
if (baseField.tag !== '264' || sourceField.tag !== '260') {
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const relevantSubfields = sourceField.subfields.filter(sf => sf.code === 'c' && hasCopyright(sf.value));
|
|
41
|
+
|
|
42
|
+
relevantSubfields.forEach(sf => {
|
|
43
|
+
// Add new:
|
|
44
|
+
const value = sf.value.replace(/\.$/u, '');
|
|
45
|
+
baseRecord.insertField({'tag': '264', 'ind1': ' ', 'ind2': '4', 'subfields': [{'code': 'c', value}]});
|
|
46
|
+
// Modify original subfield:
|
|
47
|
+
sf.value = removeCopyright(sf.value); // eslint-disable-line functional/immutable-data
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// eslint-disable-next-line max-params
|
|
52
|
+
function mergeField2(baseRecord, baseField, sourceField, config, candFieldPairs880 = []) {
|
|
53
|
+
//// Identical fields
|
|
54
|
+
// No need to check every subfield separately.
|
|
55
|
+
// Also no need to postprocess the resulting field.
|
|
56
|
+
if (fieldToString(baseField) === fieldToString(sourceField)) {
|
|
57
|
+
return baseRecord;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// If a base ennakkotieto is merged with real data, remove ennakkotieto subfield:
|
|
61
|
+
// (If our prepub normalizations are ok, this should not be needed.
|
|
62
|
+
// However, it's simple and works well enough, so let's keep it here.)
|
|
63
|
+
if (fieldHasSubfield(baseField, 'g', 'ENNAKKOTIETO.') && !fieldHasSubfield(sourceField, 'g', 'ENNAKKOTIETO.')) { // eslint-disable-line functional/no-conditional-statements
|
|
64
|
+
removeEnnakkotieto(baseField);
|
|
65
|
+
baseField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
copyrightYearHack(baseRecord, baseField, sourceField);
|
|
69
|
+
|
|
70
|
+
mergeIndicators(baseField, sourceField, config);
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
// We want to add the incoming subfields without punctuation, and add puctuation later on.
|
|
74
|
+
// (Cloning is harmless, but probably not needed.)
|
|
75
|
+
// NEW: we also drag the normalized version along. It is needed for the merge-or-add decision
|
|
76
|
+
const normalizedSourceField = cloneAndNormalizeFieldForComparison(sourceField); // This is for comparison
|
|
77
|
+
const strippedSourceField = cloneAndRemovePunctuation(sourceField); // This is for adding subfields
|
|
78
|
+
|
|
79
|
+
//nvdebug(` MERGING SUBFIELDS OF '${fieldToString(sourceField)}' (original)`, debugDev);
|
|
80
|
+
//nvdebug(` MERGING SUBFIELDS OF '${fieldToString(normalizedSourceField)}' (comparison)`, debugDev);
|
|
81
|
+
nvdebug(` MERGING SUBFIELDS OF '${fieldToString(strippedSourceField)}' (merge/add)`, debugDev);
|
|
82
|
+
|
|
83
|
+
sourceField.subfields.forEach((originalSubfield, index) => {
|
|
84
|
+
//strippedSourceField.subfields.forEach((subfieldForMergeOrAdd, index) => {
|
|
85
|
+
const normalizedSubfield = normalizedSourceField.subfields[index];
|
|
86
|
+
const punctlessSubfield = strippedSourceField.subfields[index];
|
|
87
|
+
const originalBaseValue = fieldToString(baseField);
|
|
88
|
+
nvdebug(` TRYING TO MERGE SUBFIELD '${subfieldToString(originalSubfield)}' TO '${originalBaseValue}'`, debugDev);
|
|
89
|
+
|
|
90
|
+
const subfieldData = {'tag': sourceField.tag, 'code': originalSubfield.code, 'originalValue': originalSubfield.value, 'normalizedValue': normalizedSubfield.value, 'punctuationlessValue': punctlessSubfield.value};
|
|
91
|
+
|
|
92
|
+
mergeOrAddSubfield(baseField, subfieldData, candFieldPairs880); // candSubfield);
|
|
93
|
+
const newValue = fieldToString(baseField);
|
|
94
|
+
if (originalBaseValue !== newValue) { // eslint-disable-line functional/no-conditional-statements
|
|
95
|
+
nvdebug(` SUBFIELD MERGE RESULT: '${newValue}'`, debugDev);
|
|
96
|
+
//debug(` TODO: sort subfields, handle punctuation...`);
|
|
97
|
+
}
|
|
98
|
+
//else { debugDev(` mergeOrAddSubfield() did not add '‡${fieldToString(subfieldForMergeOrAdd)}' to '${originalValue}'`); }
|
|
99
|
+
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
function skipMergeField(baseRecord, sourceField, config) {
|
|
105
|
+
if (!mergableTag(sourceField.tag, config)) {
|
|
106
|
+
nvdebug(`skipMergeField(): field '${fieldToString(sourceField)}' listed as skippable!`, debugDev);
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Skip duplicate field:
|
|
111
|
+
if (baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
|
|
112
|
+
nvdebug(`skipMergeField(): field '${fieldToString(sourceField)}' already exists! No merge required!`, debugDev);
|
|
113
|
+
sourceField.deleted = 1; // eslint-disable-line functional/immutable-data
|
|
114
|
+
return true;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function sourceRecordIsBetter(baseField, sourceField) {
|
|
121
|
+
if (!baseField.subfields) {
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// MELINDA-8978: prefer Asteri version
|
|
125
|
+
if (isAsteriField(sourceField) && !isAsteriField(baseField)) {
|
|
126
|
+
return 1;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function isAsteriField(field) {
|
|
130
|
+
if (field.subfields.some(sf => sf.code === '0' && sf.value.match(/^\((?:FI-ASTERI-[NW]|FIN1[13])\)[0-9]{9}$/u))) {
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return false;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
function swapDataBetweenFields(field1, field2) {
|
|
138
|
+
// NB! Does not support controlfields yet! Add support if the need arises.
|
|
139
|
+
if (field1.subfields) { // If field1 has subfields, then also field2 has them. No need to check the other field here.
|
|
140
|
+
swapNamedData('ind1');
|
|
141
|
+
swapNamedData('ind2');
|
|
142
|
+
swapNamedData('subfields');
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
return;
|
|
146
|
+
|
|
147
|
+
function swapNamedData(name) {
|
|
148
|
+
const data = field1[name]; // eslint-disable-line functional/immutable-data
|
|
149
|
+
field1[name] = field2[name]; // eslint-disable-line functional/immutable-data
|
|
150
|
+
field2[name] = data; // eslint-disable-line functional/immutable-data
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
export function mergeField(baseRecord, sourceRecord, sourceField, config) {
|
|
156
|
+
nvdebug(`SELF: ${fieldToString(sourceField)}`, debugDev);
|
|
157
|
+
|
|
158
|
+
sourceField.mergeCandidate = true; // eslint-disable-line functional/immutable-data
|
|
159
|
+
// skip duplicates and special cases:
|
|
160
|
+
if (skipMergeField(baseRecord, sourceField, config)) {
|
|
161
|
+
nvdebug(`mergeField(): don't merge '${fieldToString(sourceField)}'`, debugDev);
|
|
162
|
+
delete sourceField.mergeCandidate; // eslint-disable-line functional/immutable-data
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
nvdebug(`mergeField(): Try to merge '${fieldToString(sourceField)}'.`, debugDev);
|
|
167
|
+
const counterpartField = getCounterpart(baseRecord, sourceRecord, sourceField, config);
|
|
168
|
+
|
|
169
|
+
if (counterpartField) {
|
|
170
|
+
if (sourceRecordIsBetter(counterpartField, sourceField)) { // eslint-disable-line functional/no-conditional-statements
|
|
171
|
+
swapDataBetweenFields(counterpartField, sourceField);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
const candFieldPairs880 = sourceField.tag === '880' ? undefined : fieldGetOccurrenceNumberPairs(sourceField, sourceRecord.fields);
|
|
175
|
+
nvdebug(`mergeField(): Got counterpart: '${fieldToString(counterpartField)}'. Thus try merge...`, debugDev);
|
|
176
|
+
nvdebug(`PAIR: ${candFieldPairs880 ? fieldsToString(candFieldPairs880) : 'NADA'}`, debugDev);
|
|
177
|
+
mergeField2(baseRecord, counterpartField, sourceField, config, candFieldPairs880);
|
|
178
|
+
sourceField.deleted = 1; // eslint-disable-line functional/immutable-data
|
|
179
|
+
delete sourceField.mergeCandidate; // eslint-disable-line functional/immutable-data
|
|
180
|
+
return true;
|
|
181
|
+
}
|
|
182
|
+
// NB! Counterpartless field is inserted to 7XX even if field.tag says 1XX:
|
|
183
|
+
debugDev(`mergeField(): No mergable counterpart found for '${fieldToString(sourceField)}'.`);
|
|
184
|
+
delete sourceField.mergeCandidate; // eslint-disable-line functional/immutable-data
|
|
185
|
+
return false;
|
|
186
|
+
}
|
|
187
|
+
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import createDebugLogger from 'debug';
|
|
2
|
+
import {fieldToString, marc21GetTagsLegalInd1Value, marc21GetTagsLegalInd2Value, nvdebug} from '../utils';
|
|
3
|
+
|
|
4
|
+
// Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
|
|
5
|
+
|
|
6
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:merge-fields:mergeIndicator');
|
|
7
|
+
//const debugData = debug.extend('data');
|
|
8
|
+
const debugDev = debug.extend('dev');
|
|
9
|
+
|
|
10
|
+
const ind1NonFilingChars = ['130', '630', '730', '740'];
|
|
11
|
+
const ind2NonFilingChars = ['222', '240', '242', '243', '245', '830'];
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
export function mergeIndicators(toField, fromField, config) {
|
|
15
|
+
// NB! For non-filing indicators we deem that bigger is better. This is a bit quick'n'dirty, as usual.
|
|
16
|
+
// We could and should checks the relevant article length (using language information whilst doing it).
|
|
17
|
+
// However, this is a task for record internal fixer, not merge.
|
|
18
|
+
//
|
|
19
|
+
// For other indicators the situation is trickier, as we don't know which one is the good value.
|
|
20
|
+
//
|
|
21
|
+
// NB! We could add fixes for various other indicator types as well. However, it gets quickly pretty ad hoc.
|
|
22
|
+
// nvdebug(fieldToString(toField), debugDev);
|
|
23
|
+
// nvdebug(fieldToString(fromField), debugDev);
|
|
24
|
+
|
|
25
|
+
mergeIndicator1(toField, fromField, config);
|
|
26
|
+
mergeIndicator2(toField, fromField, config);
|
|
27
|
+
|
|
28
|
+
function getIndicatorPreferredValues(tag, indicatorNumber, config) {
|
|
29
|
+
const cands = getIndicatorPreferredValuesForGivenTag(tag, indicatorNumber, config);
|
|
30
|
+
// More complex systems where multiple indicators have same priority are objects.
|
|
31
|
+
// Example: field 506 might return {"0": 1, "1": 1, " ": 2}
|
|
32
|
+
// Here indicator values '0' and '1' share top priority 1, and '#' is of lesser importance, namely 2.
|
|
33
|
+
if (Array.isArray(cands) || typeof cands === 'object') {
|
|
34
|
+
return cands;
|
|
35
|
+
}
|
|
36
|
+
if (typeof cands === 'string') { // single cand as string (seen in json in the past), though now they should all be arrays
|
|
37
|
+
return cands.split('');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
return [];
|
|
41
|
+
|
|
42
|
+
function getIndicatorPreferredValuesForGivenTag(tag, indicatorNumber, config) {
|
|
43
|
+
const preferredValues = indicatorNumber === 1 ? config.indicator1PreferredValues : config.indicator2PreferredValues;
|
|
44
|
+
nvdebug(`${tag} IND${indicatorNumber}: get preferred values...\nCONFIG: ${JSON.stringify(config)}`, debugDev);
|
|
45
|
+
if (preferredValues) {
|
|
46
|
+
//nvdebug(`${tag} PREF VALS: ${JSON.stringify(preferredValues)}`, debugDev);
|
|
47
|
+
if (tag in preferredValues) {
|
|
48
|
+
return preferredValues[tag];
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Easter Egg #1: Use good-ish hard-coded defaults as not defined by user:
|
|
53
|
+
if (indicatorNumber === 1 && ind1NonFilingChars.includes(tag)) {
|
|
54
|
+
return '9876543210 ';
|
|
55
|
+
}
|
|
56
|
+
if (indicatorNumber === 2 && ind2NonFilingChars.includes(tag)) {
|
|
57
|
+
return '9876543210 ';
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Easter Egg #2: Marc21 standard has just one value for given indicator, so prefer it:
|
|
61
|
+
const cands = indicatorNumber === 1 ? marc21GetTagsLegalInd1Value(tag) : marc21GetTagsLegalInd2Value(tag);
|
|
62
|
+
if (cands) {
|
|
63
|
+
if (typeof cands === 'string' && cands.length === 1) { // single cand
|
|
64
|
+
return [cands];
|
|
65
|
+
}
|
|
66
|
+
if (Array.isArray(cands) && cands.length === 1) {
|
|
67
|
+
return cands;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return [];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function getPreferredValue(preferences, val1, val2) {
|
|
76
|
+
const i1 = scoreValue(preferences, val1);
|
|
77
|
+
const i2 = scoreValue(preferences, val2);
|
|
78
|
+
if (i1 === -1) {
|
|
79
|
+
return i2 === -1 ? undefined : val2;
|
|
80
|
+
}
|
|
81
|
+
if (i2 === -1) {
|
|
82
|
+
return val1;
|
|
83
|
+
}
|
|
84
|
+
// The sooner, the better:
|
|
85
|
+
return i1 < i2 ? val1 : val2;
|
|
86
|
+
|
|
87
|
+
function scoreValue(preferences, val) {
|
|
88
|
+
if (Array.isArray(preferences)) {
|
|
89
|
+
return preferences.indexOf(val);
|
|
90
|
+
}
|
|
91
|
+
// preferences may be an object, since diffent values can return same score
|
|
92
|
+
// (eg. 506 ind1 values '0' and '1' are equal but better than '#')
|
|
93
|
+
if (!(val in preferences)) {
|
|
94
|
+
return -1;
|
|
95
|
+
}
|
|
96
|
+
return preferences[val];
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
function fieldIsFenniKept(field) {
|
|
102
|
+
return field.subfields && field.subfields.some(sf => sf.code === '9' && sf.value === 'FENNI<KEEP>');
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function mergeIndicator1(toField, fromField, config) {
|
|
106
|
+
if (toField.ind1 === fromField.ind1) {
|
|
107
|
+
return; // Do nothing
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// MRA-300: If source contains the (un)holy $9 FENNI<KEEP>, we prefer that value regardless of whatever...
|
|
111
|
+
if (!fieldIsFenniKept(toField) && fieldIsFenniKept(fromField)) {
|
|
112
|
+
toField.ind1 = fromField.ind1; // eslint-disable-line functional/immutable-data
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
const preferredValues = getIndicatorPreferredValues(toField.tag, 1, config);
|
|
118
|
+
|
|
119
|
+
if (preferredValues) {
|
|
120
|
+
//nvdebug(`Try to merge indicator 1: '${toField.ind1}' vs '${fromField.ind1}'`, debugDev);
|
|
121
|
+
//nvdebug(`PREF VALS: ${preferredValues}`, debugDev);
|
|
122
|
+
const preferredValue = getPreferredValue(preferredValues, fromField.ind1, toField.ind1);
|
|
123
|
+
if (typeof preferredValue !== 'undefined') {
|
|
124
|
+
//nvdebug(`${preferredValue} WINS!`, debugDev);
|
|
125
|
+
toField.ind1 = preferredValue; // eslint-disable-line functional/immutable-data
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
//nvdebug(`No winner found indicator 1: '${toField.ind1}' vs '${fromField.ind1}', keep '${toField.ind1}'`, debugDev);
|
|
129
|
+
//return;
|
|
130
|
+
}
|
|
131
|
+
//nvdebug(`TAG '${toField.tag}': No rule to merge indicator 1: '${toField.ind1}' vs '${fromField.ind1}', keep '${toField.ind1}'`, debugDev);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
function publisherTagSwapHack(toField, fromField) {
|
|
136
|
+
// NB! Note that field 264.ind2==3 maps to $efg in field 260, so it is not relevant *here*:
|
|
137
|
+
// (Not sure whether our ind2 sanity check list should contain '4' (copyright year) as well:)
|
|
138
|
+
if (toField.tag !== '260' || fromField.tag !== '264' || !['0', '1', '2'].includes(fromField.ind2)) {
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
// Field 264 IND2 contains information that can not be coded into field 260.
|
|
142
|
+
|
|
143
|
+
// However, 260 contains data that cannot be converted to 264 as well
|
|
144
|
+
if (toField.subfields.some(sf => ['e', 'f', 'g'].includes(sf.code))) {
|
|
145
|
+
nvdebug(`WARNING: can not change base 260 to 264 as it contains $e, $f and/or $g. Source IND2 info lost.`, debugDev);
|
|
146
|
+
nvdebug(` ${fieldToString(toField)}\n ${fieldToString(fromField)}`, debugDev);
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Convert 260 to 264 so that no information is lost:
|
|
151
|
+
nvdebug(`Apply base 260->264 tag swap hack`, debugDev);
|
|
152
|
+
nvdebug(` ${fieldToString(toField)}\n ${fieldToString(fromField)}`, debugDev);
|
|
153
|
+
|
|
154
|
+
toField.tag = '264'; // eslint-disable-line functional/immutable-data
|
|
155
|
+
toField.ind2 = fromField.ind2; // eslint-disable-line functional/immutable-data
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function mergeIndicator2(toField, fromField, config) {
|
|
159
|
+
if (toField.ind2 === fromField.ind2) {
|
|
160
|
+
return; // Do nothing
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
//nvdebug(`Merge IND2`, debugDev);
|
|
164
|
+
//nvdebug(` ${fieldToString(toField)}\n ${fieldToString(fromField)}`, debugDev);
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
publisherTagSwapHack(toField, fromField); // Easter egg/hack for base-260 vs source-264
|
|
168
|
+
|
|
169
|
+
// If source contains $9 FENNI<KEEP>, we might prefer it?
|
|
170
|
+
|
|
171
|
+
//nvdebug(`Try to merge indicator 2: '${toField.ind2}' vs '${fromField.ind2}'`, debugDev);
|
|
172
|
+
const preferredValues = getIndicatorPreferredValues(toField.tag, 2, config);
|
|
173
|
+
|
|
174
|
+
if (preferredValues) {
|
|
175
|
+
//nvdebug(` Try to merge indicator 2. Got preferred values '${preferredValues}'`, debugDev);
|
|
176
|
+
const preferredValue = getPreferredValue(preferredValues, fromField.ind2, toField.ind2);
|
|
177
|
+
if (typeof preferredValue !== 'undefined') {
|
|
178
|
+
toField.ind2 = preferredValue; // eslint-disable-line functional/immutable-data
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// This field should be renamed, as it is called also from outside megre.
|
|
2
|
+
|
|
3
|
+
//import {MarcRecord} from '@natlibfi/marc-record';
|
|
4
|
+
import {fieldFixPunctuation} from '../punctuation2';
|
|
5
|
+
import {fieldRemoveDuplicateSubfields} from './removeDuplicateSubfields';
|
|
6
|
+
import {sortAdjacentSubfields} from '../sortSubfields';
|
|
7
|
+
import {sortAdjacentESubfields} from '../sortRelatorTerms';
|
|
8
|
+
|
|
9
|
+
function postprocessBaseRecord(base) {
|
|
10
|
+
|
|
11
|
+
base.fields.forEach(field => {
|
|
12
|
+
// NB! Relator terms are now expanded and translated already at preprocess stage!
|
|
13
|
+
|
|
14
|
+
// remove merge-specific information:
|
|
15
|
+
if (field.merged) { // eslint-disable-line functional/no-conditional-statements
|
|
16
|
+
// Field level ideas about things that could be done here:
|
|
17
|
+
// - Fix indicators?
|
|
18
|
+
// Record level fixes should be implemented as validators/fixers
|
|
19
|
+
// in marc-record-validators-melinda and ust called from here.
|
|
20
|
+
fieldRemoveDuplicateSubfields(field);
|
|
21
|
+
fieldFixPunctuation(field); // NB! This will fix only fields with merged content
|
|
22
|
+
sortAdjacentSubfields(field); // Put the added $e subfield to proper places.
|
|
23
|
+
sortAdjacentESubfields(field); // Sort $e subfields with each other
|
|
24
|
+
fieldFixPunctuation(field);
|
|
25
|
+
|
|
26
|
+
delete field.merged; // eslint-disable-line functional/immutable-data
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (field.useExternalEndPunctuation) { // eslint-disable-line functional/no-conditional-statements
|
|
30
|
+
delete field.useExternalEndPunctuation; // eslint-disable-line functional/immutable-data
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (field.added) { // eslint-disable-line functional/no-conditional-statements
|
|
34
|
+
delete field.added; // eslint-disable-line functional/immutable-data
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/*
|
|
38
|
+
if (field.deleted) { // eslint-disable-line functional/no-conditional-statements
|
|
39
|
+
delete field.deleted; // eslint-disable-line functional/immutable-data
|
|
40
|
+
}
|
|
41
|
+
*/
|
|
42
|
+
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
function removeDeletedFields(record) {
|
|
48
|
+
// remove fields that are marked as deleted:
|
|
49
|
+
record.fields = record.fields.filter(f => !f.deleted); // eslint-disable-line functional/immutable-data
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
export function postprocessRecords(base, source) {
|
|
54
|
+
postprocessBaseRecord(base);
|
|
55
|
+
removeDeletedFields(source); // So that we may know what was used, and what not.
|
|
56
|
+
}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import createDebugLogger from 'debug';
|
|
2
|
+
import {cloneAndNormalizeFieldForComparison} from '../normalizeFieldForComparison.js';
|
|
3
|
+
import {normalizeAs, normalizeControlSubfieldValue} from '../normalize-identifiers';
|
|
4
|
+
import {fieldHasSubfield, fieldToString, isControlSubfieldCode, nvdebug, subfieldIsRepeatable, subfieldToString} from '../utils.js';
|
|
5
|
+
import {mergeSubfield} from './mergeSubfield.js';
|
|
6
|
+
import {sortAdjacentSubfields} from '../sortSubfields'; //'./sortSubfields.js';
|
|
7
|
+
|
|
8
|
+
import {valueCarriesMeaning} from './worldKnowledge.js';
|
|
9
|
+
import {resetSubfield6Tag} from '../subfield6Utils.js';
|
|
10
|
+
|
|
11
|
+
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:mergeOrAddSubfield');
|
|
12
|
+
//const debugData = debug.extend('data');
|
|
13
|
+
const debugDev = debug.extend('dev');
|
|
14
|
+
|
|
15
|
+
function catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTargetField(targetField, candSubfieldData) {
|
|
16
|
+
if (targetField.tag !== '040' || candSubfieldData.code !== 'd') {
|
|
17
|
+
return false;
|
|
18
|
+
}
|
|
19
|
+
nvdebug(`${fieldToString(targetField)} vs $d ${candSubfieldData.originalValue}}`, debugDev);
|
|
20
|
+
// Add hard-coded exceptions here
|
|
21
|
+
if (targetField.subfields.some(sf => sf.code === 'a' && sf.value === candSubfieldData.originalValue)) {
|
|
22
|
+
nvdebug('040‡d matched 040‡a', debugDev);
|
|
23
|
+
return true;
|
|
24
|
+
}
|
|
25
|
+
return false;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function ennakkotietoInSubfieldG(candSubfieldData) {
|
|
29
|
+
if (candSubfieldData.code === 'g' && ['ENNAKKOTIETO.', 'ENNAKKOTIETO'].includes(candSubfieldData.originalValue)) {
|
|
30
|
+
// Skip just ‡g subfield or the whole field?
|
|
31
|
+
// We decided to skip just this subfield. We want at least $0 and maybe even more from ennakkotieto.
|
|
32
|
+
debugDev('Skip ‡g ENNAKKOTIETO.');
|
|
33
|
+
return true;
|
|
34
|
+
}
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
function mergeOrAddSubfieldNotRequiredSpecialCases(targetField, candSubfieldData) {
|
|
40
|
+
|
|
41
|
+
// Don't bring WHATEVER<KEEP> from source 7XX to base 1XX.
|
|
42
|
+
// Exceptionally we can merge <KEEP>ed 7XX with un-<KEEP>ed 1XX as 1XX should not use <KEEP>s.
|
|
43
|
+
if (targetField.tag.charAt(0) === '1' && candSubfieldData.tag.charAt(0) === '7' && candSubfieldData.code === '9' && candSubfieldData.originalValue.match(/<KEEP>/u)) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Don't add 264$b 'Kustannuspaikka tuntematon' etc
|
|
48
|
+
if (!valueCarriesMeaning(targetField.tag, candSubfieldData.code, candSubfieldData.normalizedValue)) {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
// Don't add $0 subfields that mean the same even if they look different:
|
|
54
|
+
const alephIdentifierType = normalizeAs(targetField.tag, candSubfieldData.code);
|
|
55
|
+
if (alephIdentifierType !== undefined) {
|
|
56
|
+
const normalizedSubfieldValue = normalizeControlSubfieldValue(candSubfieldData.originalValue, alephIdentifierType);
|
|
57
|
+
if (targetField.subfields.some(sf => normalizeControlSubfieldValue(sf.value) === normalizedSubfieldValue && sf.code === candSubfieldData.code)) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return false;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
function skipNormalizedComparison(tag, subfieldCode, subfieldValue) {
|
|
66
|
+
if (tag === '020' && subfieldCode === 'a') {
|
|
67
|
+
return true;
|
|
68
|
+
}
|
|
69
|
+
// Hackish: we want 'ntamo' to win 'Ntamo'...
|
|
70
|
+
// If there are other similar excepting put them into an array.
|
|
71
|
+
if (['110', '610', '710', '810'].includes(tag) && subfieldCode === 'a' && subfieldValue.substring(0, 5) === 'ntamo') {
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function mergeOrAddSubfieldNotRequired(targetField, candSubfieldData) {
|
|
78
|
+
if (catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTargetField(targetField, candSubfieldData) || ennakkotietoInSubfieldG(candSubfieldData)) {
|
|
79
|
+
return true;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (mergeOrAddSubfieldNotRequiredSpecialCases(targetField, candSubfieldData)) {
|
|
83
|
+
return true;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
const relevantTargetSubfields = targetField.subfields.filter(sf => sf.code === candSubfieldData.code);
|
|
87
|
+
// Target field does not have this subfield yet:
|
|
88
|
+
if (relevantTargetSubfields.length === 0) {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
nvdebug(` Look for identical subfields in '${fieldToString(targetField)}' using`, debugDev);
|
|
92
|
+
nvdebug(` ORIG. ‡${candSubfieldData.code} ${candSubfieldData.originalValue}`, debugDev);
|
|
93
|
+
nvdebug(` NO-PUNC ‡${candSubfieldData.code} ${candSubfieldData.punctuationlessValue}`, debugDev);
|
|
94
|
+
if (relevantTargetSubfields.some(sf => sf.code === candSubfieldData.code && sf.value === candSubfieldData.originalValue)) {
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
if (relevantTargetSubfields.some(sf => sf.code === candSubfieldData.code && sf.value === candSubfieldData.punctuationlessValue)) {
|
|
98
|
+
return true;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (!skipNormalizedComparison(targetField.tag, candSubfieldData.code, candSubfieldData.originalValue)) {
|
|
102
|
+
const normalizedTargetField = cloneAndNormalizeFieldForComparison(targetField);
|
|
103
|
+
nvdebug(` Look for identical normalized subfields in '${fieldToString(normalizedTargetField)}'`, debugDev);
|
|
104
|
+
nvdebug(` NO-PUNC ‡${candSubfieldData.code} ${candSubfieldData.normalizedValue})`, debugDev);
|
|
105
|
+
|
|
106
|
+
if (normalizedTargetField.subfields.some(sf => sf.code === candSubfieldData.code && sf.value === candSubfieldData.normalizedValue)) {
|
|
107
|
+
// Subfield with identical normalized value exists. Do nothing.
|
|
108
|
+
// Not ideal 382‡n subfields, I guess... Nor 505‡trg repetitions... These need to be fixed...
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
return false; // (note that this is a double negation: not required is false)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function addSubfield(targetField, candSubfield) {
|
|
117
|
+
nvdebug(` Added subfield '${subfieldToString(candSubfield)}' to field`, debugDev);
|
|
118
|
+
// Add subfield to the end of all subfields. NB! Implement a separate function that does this + subfield reordering somehow...
|
|
119
|
+
targetField.subfields.push(candSubfield); // eslint-disable-line functional/immutable-data
|
|
120
|
+
|
|
121
|
+
targetField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
122
|
+
|
|
123
|
+
setPunctuationFlag(targetField, candSubfield);
|
|
124
|
+
sortAdjacentSubfields(targetField);
|
|
125
|
+
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function setPunctuationFlag(field, addedSubfield) {
|
|
129
|
+
if (isControlSubfieldCode(addedSubfield.code)) { // These are never punctuation related
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
field.useExternalEndPunctuation = 1; // eslint-disable-line functional/immutable-data
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
function resetPaired880(candFieldPair880, targetField, punctlessCandSubfield) {
|
|
137
|
+
// No relevant:
|
|
138
|
+
if (punctlessCandSubfield.code !== '6') {
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
if (targetField.tag === '880') {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
// NB! $6 comes first:
|
|
145
|
+
if (candFieldPair880 === undefined || !candFieldPair880.subfields || candFieldPair880.subfields[0].code !== '6') {
|
|
146
|
+
return;
|
|
147
|
+
|
|
148
|
+
}
|
|
149
|
+
nvdebug(`880 contents: ${fieldToString(candFieldPair880)}`, debugDev);
|
|
150
|
+
resetSubfield6Tag(candFieldPair880.subfields[0], targetField.tag);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
export function mergeOrAddSubfield(targetField, candSubfieldData, candFieldPairs880 = []) {
|
|
154
|
+
|
|
155
|
+
const candSubfieldAsString = `${candSubfieldData.code} ${candSubfieldData.originalValue}`;
|
|
156
|
+
|
|
157
|
+
nvdebug(` Q: mergeOrAddSubfield '${candSubfieldAsString}'\n with field '${fieldToString(targetField)}'?`, debugDev);
|
|
158
|
+
if (mergeOrAddSubfieldNotRequired(targetField, candSubfieldData)) {
|
|
159
|
+
nvdebug(` A: No. No need to merge nor to add the subfield '${candSubfieldAsString}'`, debugDev);
|
|
160
|
+
return;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const candSubfield = {'code': candSubfieldData.code, 'value': candSubfieldData.punctuationlessValue};
|
|
164
|
+
|
|
165
|
+
// Currently only for X00$d 1984- => 1984-2000 type of changes, where source version is better that what base has.
|
|
166
|
+
// It all other cases the original subfield is kept.
|
|
167
|
+
const original = fieldToString(targetField);
|
|
168
|
+
|
|
169
|
+
if (mergeSubfield(targetField, candSubfield)) { // We might need the normalizedCandSubfield later on
|
|
170
|
+
mergeSubfieldPostprocessor();
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Subfield codes missing from the original record can be added by default:
|
|
175
|
+
if (addSubfieldWithPreviouslyUnseenSubfieldCode()) {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// melindaCustomMergeFields.json tells us whether the subfield is repeatable or not:
|
|
180
|
+
if (subfieldIsRepeatable(targetField.tag, candSubfield.code)) {
|
|
181
|
+
// We don't want to add multiple, say, 260$c
|
|
182
|
+
if (['260', '264'].includes(targetField.tag)) {
|
|
183
|
+
nvdebug(` A: Exceptionally skip repeatable existing subfield '${subfieldToString(candSubfield)}'`, debugDev);
|
|
184
|
+
return;
|
|
185
|
+
}
|
|
186
|
+
nvdebug(` A: Yes. Add repeatable subfield '${subfieldToString(candSubfield)}'`, debugDev);
|
|
187
|
+
targetField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
188
|
+
setPunctuationFlag(targetField, candSubfield);
|
|
189
|
+
addSubfield(targetField, candSubfield);
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
nvdebug(` A: No. Non-repeatable subfield '${subfieldToString(candSubfield)}'`, debugDev);
|
|
194
|
+
return;
|
|
195
|
+
|
|
196
|
+
function mergeSubfieldPostprocessor() {
|
|
197
|
+
if (original !== fieldToString(targetField)) {
|
|
198
|
+
nvdebug(` A: Merge. Subfield '${candSubfieldAsString}' replaces the original subfield.`, debugDev);
|
|
199
|
+
targetField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
200
|
+
setPunctuationFlag(targetField, candSubfield);
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
nvdebug(` A: No. Field ${original} already had the same or a synonymous or a better merge candidate than our subfield '${candSubfieldAsString}'.`, debugDev);
|
|
204
|
+
return;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function addSubfieldWithPreviouslyUnseenSubfieldCode() {
|
|
208
|
+
if (!fieldHasSubfield(targetField, candSubfield.code)) {
|
|
209
|
+
nvdebug(` A: Yes. Add previously unseen subfield '${subfieldToString(candSubfield)}'`, debugDev);
|
|
210
|
+
targetField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
211
|
+
setPunctuationFlag(targetField, candSubfield);
|
|
212
|
+
candFieldPairs880.forEach(pair => resetPaired880(pair, targetField, candSubfield));
|
|
213
|
+
addSubfield(targetField, candSubfield);
|
|
214
|
+
return true;
|
|
215
|
+
}
|
|
216
|
+
return false;
|
|
217
|
+
}
|
|
218
|
+
}
|