@natlibfi/marc-record-validators-melinda 10.12.0-alpha.4 → 10.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/normalizeFieldForComparison.js +292 -0
- package/dist/normalizeFieldForComparison.js.map +1 -0
- package/dist/normalizeSubfieldValueForComparison.js +103 -0
- package/dist/normalizeSubfieldValueForComparison.js.map +1 -0
- package/dist/punctuation2.js +24 -3
- package/dist/punctuation2.js.map +1 -1
- package/dist/removeInferiorDataFields.js +46 -18
- package/dist/removeInferiorDataFields.js.map +1 -1
- package/dist/sortSubfields.js +15 -12
- package/dist/sortSubfields.js.map +1 -1
- package/dist/utils.js +7 -0
- package/dist/utils.js.map +1 -1
- package/package.json +5 -5
- package/src/normalizeFieldForComparison.js +312 -0
- package/src/normalizeSubfieldValueForComparison.js +96 -0
- package/src/punctuation2.js +15 -4
- package/src/removeInferiorDataFields.js +51 -18
- package/src/sortSubfields.js +11 -9
- package/src/utils.js +7 -0
- package/test-fixtures/remove-inferior-datafields/f07a/expectedResult.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07a/record.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07b/expectedResult.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07b/record.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07c/expectedResult.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07c/metadata.json +1 -1
- package/test-fixtures/remove-inferior-datafields/f07c/record.json +2 -2
- package/test-fixtures/remove-inferior-datafields/f07d/expectedResult.json +11 -0
- package/test-fixtures/remove-inferior-datafields/f07d/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f07d/record.json +16 -0
- package/test-fixtures/remove-inferior-datafields/f07e/expectedResult.json +13 -0
- package/test-fixtures/remove-inferior-datafields/f07e/metadata.json +7 -0
- package/test-fixtures/remove-inferior-datafields/f07e/record.json +20 -0
- package/test-fixtures/remove-inferior-datafields/f07f/expectedResult.json +12 -0
- package/test-fixtures/remove-inferior-datafields/f07f/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f07f/record.json +18 -0
- package/test-fixtures/remove-inferior-datafields/f07g/expectedResult.json +19 -0
- package/test-fixtures/remove-inferior-datafields/f07g/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f07g/record.json +19 -0
- package/test-fixtures/remove-inferior-datafields/f07h/expectedResult.json +27 -0
- package/test-fixtures/remove-inferior-datafields/f07h/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f07h/record.json +27 -0
- package/test-fixtures/remove-inferior-datafields/f07i/expectedResult.json +20 -0
- package/test-fixtures/remove-inferior-datafields/f07i/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f07i/record.json +27 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.cloneAndNormalizeFieldForComparison = cloneAndNormalizeFieldForComparison;
|
|
7
|
+
exports.cloneAndRemovePunctuation = cloneAndRemovePunctuation;
|
|
8
|
+
exports.fieldTrimSubfieldValues = fieldTrimSubfieldValues;
|
|
9
|
+
exports.tagAndSubfieldCodeReferToIsbn = tagAndSubfieldCodeReferToIsbn;
|
|
10
|
+
var _clone = _interopRequireDefault(require("clone"));
|
|
11
|
+
var _punctuation = require("./punctuation2");
|
|
12
|
+
var _utils = require("./utils.js");
|
|
13
|
+
var _normalizeIdentifiers = require("./normalize-identifiers");
|
|
14
|
+
var _debug = _interopRequireDefault(require("debug"));
|
|
15
|
+
var _normalizeSubfieldValueForComparison = require("./normalizeSubfieldValueForComparison");
|
|
16
|
+
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
17
|
+
/*
|
|
18
|
+
Note that this file contains very powerful normalizations and spells that are:
|
|
19
|
+
- meant for comparing similarity/mergability of two fields (clone, normalize, compare),
|
|
20
|
+
- and NOT for modifying the actual data!
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const debug = (0, _debug.default)('@natlibfi/melinda-marc-record-merge-reducers:normalize');
|
|
24
|
+
//const debugData = debug.extend('data');
|
|
25
|
+
const debugDev = debug.extend('dev');
|
|
26
|
+
function debugFieldComparison(oldField, newField) {
|
|
27
|
+
// NB: Debug-only function!
|
|
28
|
+
/*
|
|
29
|
+
// We may drop certain subfields:
|
|
30
|
+
if (oldField.subfields.length === newField.subfields.length) { // eslint-disable-line functional/no-conditional-statements
|
|
31
|
+
oldField.subfields.forEach((subfield, index) => {
|
|
32
|
+
const newValue = newField.subfields[index].value;
|
|
33
|
+
if (subfield.value !== newValue) { // eslint-disable-line functional/no-conditional-statements
|
|
34
|
+
nvdebug(`NORMALIZE SUBFIELD: '${subfield.value}' => '${newValue}'`, debugDev);
|
|
35
|
+
}
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
*/
|
|
39
|
+
const oldString = (0, _utils.fieldToString)(oldField);
|
|
40
|
+
const newString = (0, _utils.fieldToString)(newField);
|
|
41
|
+
if (oldString === newString) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
//nvdebug(`NORMALIZE FIELD:\n '${fieldToString(oldField)}' =>\n '${fieldToString(newField)}'`, debugDev);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function containsHumanName(tag = '???', subfieldCode = undefined) {
|
|
48
|
+
// NB! This set is for bibs! Auth has 400... What else...
|
|
49
|
+
if (['100', '600', '700', '800'].includes(tag)) {
|
|
50
|
+
if (subfieldCode === undefined || subfieldCode === 'a') {
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
// Others?
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
function containsCorporateName(tag = '???', subfieldCode = undefined) {
|
|
58
|
+
// NB! This set is for bibs! Auth has 400... What else...
|
|
59
|
+
if (['110', '610', '710', '810'].includes(tag)) {
|
|
60
|
+
if (subfieldCode === undefined || subfieldCode === 'a') {
|
|
61
|
+
return true;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// Others?
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
function skipAllSubfieldNormalizations(value, subfieldCode, tag) {
|
|
68
|
+
if (subfieldCode === 'g' && value === 'ENNAKKOTIETO.') {
|
|
69
|
+
return true;
|
|
70
|
+
}
|
|
71
|
+
if (tag === '035' && ['a', 'z'].includes(subfieldCode)) {
|
|
72
|
+
// A
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
if ((0, _utils.isControlSubfieldCode)(subfieldCode)) {
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
function skipSubfieldLowercase(value, subfieldCode, tag) {
|
|
81
|
+
// These may contain Roman Numerals...
|
|
82
|
+
if ((0, _normalizeSubfieldValueForComparison.subfieldContainsPartData)(tag, subfieldCode)) {
|
|
83
|
+
return true;
|
|
84
|
+
}
|
|
85
|
+
return skipAllSubfieldNormalizations(value, subfieldCode, tag);
|
|
86
|
+
}
|
|
87
|
+
function skipAllFieldNormalizations(tag) {
|
|
88
|
+
if (['LOW', 'SID'].includes(tag)) {
|
|
89
|
+
return true;
|
|
90
|
+
}
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
93
|
+
function subfieldValueLowercase(value, subfieldCode, tag) {
|
|
94
|
+
if (skipSubfieldLowercase(value, subfieldCode, tag)) {
|
|
95
|
+
return value;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
//return value.toLowerCase();
|
|
99
|
+
const newValue = value.toLowerCase();
|
|
100
|
+
if (newValue !== value) {
|
|
101
|
+
//nvdebug(`SVL ${tag} $${subfieldCode} '${value}' =>`, debugDev);
|
|
102
|
+
//nvdebug(`SVL ${tag} $${subfieldCode} '${newValue}'`, debugDev);
|
|
103
|
+
return newValue;
|
|
104
|
+
}
|
|
105
|
+
return value;
|
|
106
|
+
}
|
|
107
|
+
function subfieldLowercase(sf, tag) {
|
|
108
|
+
sf.value = subfieldValueLowercase(sf.value, sf.code, tag); // eslint-disable-line functional/immutable-data
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function fieldLowercase(field) {
|
|
112
|
+
if (skipFieldLowercase(field)) {
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
field.subfields.forEach(sf => subfieldLowercase(sf, field.tag));
|
|
116
|
+
function skipFieldLowercase(field) {
|
|
117
|
+
if (skipAllFieldNormalizations(field.tag)) {
|
|
118
|
+
return true;
|
|
119
|
+
}
|
|
120
|
+
// Skip non-interesting fields
|
|
121
|
+
if (!containsHumanName(field.tag) && !containsCorporateName(field.tag) && !['240', '245', '630'].includes(field.tag)) {
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
function hack490SubfieldA(field) {
|
|
128
|
+
if (field.tag !== '490') {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
field.subfields.forEach(sf => removeSarja(sf));
|
|
132
|
+
|
|
133
|
+
// NB! This won't work, if the punctuation has not been stripped beforehand!
|
|
134
|
+
function removeSarja(subfield) {
|
|
135
|
+
if (subfield.code !== 'a') {
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
const tmp = subfield.value.replace(/ ?-(?:[a-z]|ä|ö)*sarja$/u, '');
|
|
139
|
+
if (tmp.length > 0) {
|
|
140
|
+
subfield.value = tmp; // eslint-disable-line functional/immutable-data
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {
|
|
146
|
+
// NB! We don't do this to 020$z!
|
|
147
|
+
if (subfieldCode === 'z' && ['765', '767', '770', '772', '773', '774', '776', '777', '780', '785', '786', '787'].includes(tag)) {
|
|
148
|
+
return true;
|
|
149
|
+
}
|
|
150
|
+
if (tag === '020' && subfieldCode === 'a') {
|
|
151
|
+
return true;
|
|
152
|
+
}
|
|
153
|
+
return false;
|
|
154
|
+
}
|
|
155
|
+
function looksLikeIsbn(value) {
|
|
156
|
+
// Does not check validity!
|
|
157
|
+
if (value.match(/^(?:[0-9]-?){9}(?:[0-9]-?[0-9]-?[0-9]-?)?[0-9Xx]$/u)) {
|
|
158
|
+
return true;
|
|
159
|
+
}
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
function normalizeISBN(field) {
|
|
163
|
+
if (!field.subfields) {
|
|
164
|
+
return;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
//nvdebug(`ISBN-field? ${fieldToString(field)}`);
|
|
168
|
+
const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));
|
|
169
|
+
relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));
|
|
170
|
+
function normalizeIsbnSubfield(sf) {
|
|
171
|
+
//nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);
|
|
172
|
+
sf.value = sf.value.replace(/-/ug, ''); // eslint-disable-line functional/immutable-data
|
|
173
|
+
sf.value = sf.value.replace(/x/u, 'X'); // eslint-disable-line functional/immutable-data
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function fieldSpecificHacks(field) {
|
|
178
|
+
normalizeISBN(field); // 020$a, not $z!
|
|
179
|
+
hack490SubfieldA(field);
|
|
180
|
+
}
|
|
181
|
+
function fieldTrimSubfieldValues(field) {
|
|
182
|
+
field.subfields?.forEach(sf => {
|
|
183
|
+
sf.value = sf.value.replace(/^[ \t\n]+/u, ''); // eslint-disable-line functional/immutable-data
|
|
184
|
+
sf.value = sf.value.replace(/[ \t\n]+$/u, ''); // eslint-disable-line functional/immutable-data
|
|
185
|
+
sf.value = sf.value.replace(/[ \t\n]+/gu, ' '); // eslint-disable-line functional/immutable-data
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function fieldRemoveDecomposedDiacritics(field) {
|
|
190
|
+
// Raison d'être/motivation: "Sirén" and diacriticless "Siren" might refer to a same surname, so this normalization
|
|
191
|
+
// allows us to compare authors and avoid duplicate fields.
|
|
192
|
+
field.subfields.forEach(sf => {
|
|
193
|
+
sf.value = removeDecomposedDiacritics(sf.value); // eslint-disable-line functional/immutable-data
|
|
194
|
+
});
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
function removeDecomposedDiacritics(value = '') {
|
|
198
|
+
// NB #1: Does nothing to precomposed letters. Do String.normalize('NFD') first, if you want to handle them.
|
|
199
|
+
// NB #2: Finnish letters 'å', 'ä', 'ö', 'Å', Ä', and 'Ö' should be handled (=precomposed) before calling this. (= keep them as is)
|
|
200
|
+
// NB #3: Calling our very own fixComposition() before this function handles both #1 and #2.
|
|
201
|
+
return String(value).replace(/\p{Diacritic}/gu, '');
|
|
202
|
+
}
|
|
203
|
+
function normalizeSubfieldValue(value, subfieldCode, tag) {
|
|
204
|
+
// NB! For comparison of values only
|
|
205
|
+
/* eslint-disable */
|
|
206
|
+
value = subfieldValueLowercase(value, subfieldCode, tag);
|
|
207
|
+
|
|
208
|
+
// Normalize: s. = sivut = pp.
|
|
209
|
+
value = (0, _normalizeSubfieldValueForComparison.normalizePartData)(value, subfieldCode, tag);
|
|
210
|
+
value = value.replace(/^\[([^[\]]+)\]/gu, '$1'); // eslint-disable-line functional/immutable-data, prefer-named-capture-group
|
|
211
|
+
|
|
212
|
+
if (['130', '730'].includes(tag) && subfieldCode === 'a') {
|
|
213
|
+
value = value.replace(' : ', ', '); // "Halloween ends (elokuva, 2022)" vs "Halloween ends (elokuva : 2023)"
|
|
214
|
+
}
|
|
215
|
+
/* eslint-enable */
|
|
216
|
+
|
|
217
|
+
// Not going to do these in the foreseeable future, but keeping them here for discussion:
|
|
218
|
+
// Possible normalizations include but are not limited to:
|
|
219
|
+
// ø => ö? Might be language dependent: 041 $a fin => ö, 041 $a eng => o?
|
|
220
|
+
// Ø => Ö?
|
|
221
|
+
// ß => ss
|
|
222
|
+
// þ => th (NB! Both upper and lower case)
|
|
223
|
+
// ...
|
|
224
|
+
// Probably nots:
|
|
225
|
+
// ü => y (probably not, though this correlates with Finnish letter-to-sound rules)
|
|
226
|
+
// w => v (OK for Finnish sorting in certain cases, but we are not here, are we?)
|
|
227
|
+
// I guess we should use decomposed values in code here. (Not sure what composition my examples above use.)
|
|
228
|
+
return value;
|
|
229
|
+
}
|
|
230
|
+
function cloneAndRemovePunctuation(field) {
|
|
231
|
+
const clonedField = (0, _clone.default)(field);
|
|
232
|
+
if (fieldSkipNormalization(field)) {
|
|
233
|
+
return clonedField;
|
|
234
|
+
}
|
|
235
|
+
(0, _punctuation.fieldStripPunctuation)(clonedField);
|
|
236
|
+
fieldTrimSubfieldValues(clonedField);
|
|
237
|
+
debugDev('PUNC');
|
|
238
|
+
debugFieldComparison(field, clonedField);
|
|
239
|
+
return clonedField;
|
|
240
|
+
}
|
|
241
|
+
function removeCharsThatDontCarryMeaning(value, tag, subfieldCode) {
|
|
242
|
+
if (tag === '080') {
|
|
243
|
+
return value;
|
|
244
|
+
}
|
|
245
|
+
/* eslint-disable */
|
|
246
|
+
// 3" refers to inches, but as this is for comparison only we don't mind...
|
|
247
|
+
value = value.replace(/['"]/gu, '');
|
|
248
|
+
// MRA-273: Handle X00$a name initials.
|
|
249
|
+
// NB #1: that we remove spaces for comparison (as it simpler), though actually space should be used. Doesn't matter as this is comparison only.
|
|
250
|
+
// NB #2: we might/should eventually write a validator/fixer that adds those spaces. After that point, this expection should become obsolete.
|
|
251
|
+
if (subfieldCode === 'a' && ['100', '400', '600', '700', '800'].includes(tag)) {
|
|
252
|
+
// 400 is used in auth records. It's not a bib field at all.
|
|
253
|
+
value = value.replace(/([A-Z]|Å|Ä|Ö)\. +/ugi, '$1.');
|
|
254
|
+
}
|
|
255
|
+
/* eslint-enable */
|
|
256
|
+
return value;
|
|
257
|
+
}
|
|
258
|
+
function normalizeField(field) {
|
|
259
|
+
//sf.value = removeDecomposedDiacritics(sf.value); // eslint-disable-line functional/immutable-data
|
|
260
|
+
(0, _punctuation.fieldStripPunctuation)(field);
|
|
261
|
+
fieldLowercase(field);
|
|
262
|
+
(0, _normalizeIdentifiers.fieldNormalizeControlNumbers)(field); // FIN11 vs FI-MELINDA etc.
|
|
263
|
+
return field;
|
|
264
|
+
}
|
|
265
|
+
function cloneAndNormalizeFieldForComparison(field) {
|
|
266
|
+
// NB! This new field is for comparison purposes only.
|
|
267
|
+
// Some of the normalizations might be considered a bit overkill for other purposes.
|
|
268
|
+
const clonedField = (0, _clone.default)(field);
|
|
269
|
+
if (fieldSkipNormalization(field)) {
|
|
270
|
+
return clonedField;
|
|
271
|
+
}
|
|
272
|
+
clonedField.subfields.forEach(sf => {
|
|
273
|
+
// Do this for all fields or some fields?
|
|
274
|
+
sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag); // eslint-disable-line functional/immutable-data
|
|
275
|
+
sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code); // eslint-disable-line functional/immutable-data
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
normalizeField(clonedField); // eslint-disable-line functional/immutable-data
|
|
279
|
+
fieldRemoveDecomposedDiacritics(clonedField);
|
|
280
|
+
fieldSpecificHacks(clonedField);
|
|
281
|
+
fieldTrimSubfieldValues(clonedField);
|
|
282
|
+
debugFieldComparison(field, clonedField); // For debugging purposes only
|
|
283
|
+
|
|
284
|
+
return clonedField;
|
|
285
|
+
}
|
|
286
|
+
function fieldSkipNormalization(field) {
|
|
287
|
+
if (!field.subfields || ['018', '066', '080', '083'].includes(field.tag)) {
|
|
288
|
+
return true;
|
|
289
|
+
}
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
292
|
+
//# sourceMappingURL=normalizeFieldForComparison.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeFieldForComparison.js","names":["_clone","_interopRequireDefault","require","_punctuation","_utils","_normalizeIdentifiers","_debug","_normalizeSubfieldValueForComparison","obj","__esModule","default","debug","createDebugLogger","debugDev","extend","debugFieldComparison","oldField","newField","oldString","fieldToString","newString","containsHumanName","tag","subfieldCode","undefined","includes","containsCorporateName","skipAllSubfieldNormalizations","value","isControlSubfieldCode","skipSubfieldLowercase","subfieldContainsPartData","skipAllFieldNormalizations","subfieldValueLowercase","newValue","toLowerCase","subfieldLowercase","sf","code","fieldLowercase","field","skipFieldLowercase","subfields","forEach","hack490SubfieldA","removeSarja","subfield","tmp","replace","length","tagAndSubfieldCodeReferToIsbn","looksLikeIsbn","match","normalizeISBN","relevantSubfields","filter","normalizeIsbnSubfield","fieldSpecificHacks","fieldTrimSubfieldValues","fieldRemoveDecomposedDiacritics","removeDecomposedDiacritics","String","normalizeSubfieldValue","normalizePartData","cloneAndRemovePunctuation","clonedField","clone","fieldSkipNormalization","fieldStripPunctuation","removeCharsThatDontCarryMeaning","normalizeField","fieldNormalizeControlNumbers","cloneAndNormalizeFieldForComparison"],"sources":["../src/normalizeFieldForComparison.js"],"sourcesContent":["/*\n Note that this file contains very powerful normalizations and spells that are:\n - meant for comparing similarity/mergability of two fields (clone, normalize, compare),\n - and NOT for modifying the actual data!\n*/\nimport clone from 'clone';\nimport {fieldStripPunctuation} from './punctuation2';\nimport {fieldToString, isControlSubfieldCode} from './utils.js';\n\nimport {fieldNormalizeControlNumbers/*, normalizeControlSubfieldValue*/} from './normalize-identifiers';\nimport createDebugLogger from 'debug';\nimport {normalizePartData, subfieldContainsPartData} from './normalizeSubfieldValueForComparison';\n\nconst debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalize');\n//const debugData = debug.extend('data');\nconst debugDev = debug.extend('dev');\n\nfunction debugFieldComparison(oldField, newField) { // NB: Debug-only function!\n /*\n // We may drop certain subfields:\n if (oldField.subfields.length === newField.subfields.length) { // eslint-disable-line functional/no-conditional-statements\n oldField.subfields.forEach((subfield, index) => {\n const newValue = newField.subfields[index].value;\n if (subfield.value !== newValue) { // eslint-disable-line functional/no-conditional-statements\n nvdebug(`NORMALIZE SUBFIELD: '${subfield.value}' => '${newValue}'`, debugDev);\n }\n });\n }\n */\n const oldString = fieldToString(oldField);\n const newString = fieldToString(newField);\n if (oldString === newString) {\n return;\n }\n //nvdebug(`NORMALIZE FIELD:\\n '${fieldToString(oldField)}' =>\\n '${fieldToString(newField)}'`, debugDev);\n}\n\nfunction containsHumanName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['100', '600', '700', '800'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction containsCorporateName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['110', '610', '710', '810'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction skipAllSubfieldNormalizations(value, subfieldCode, tag) {\n\n\n if (subfieldCode === 'g' && value === 'ENNAKKOTIETO.') {\n return true;\n }\n\n\n if (tag === '035' && ['a', 'z'].includes(subfieldCode)) { // A\n return true;\n }\n\n if (isControlSubfieldCode(subfieldCode)) {\n return true;\n }\n return false;\n}\n\nfunction skipSubfieldLowercase(value, subfieldCode, tag) {\n // These may contain Roman Numerals...\n if (subfieldContainsPartData(tag, subfieldCode)) {\n return true;\n }\n\n return skipAllSubfieldNormalizations(value, subfieldCode, tag);\n}\n\nfunction skipAllFieldNormalizations(tag) {\n if (['LOW', 'SID'].includes(tag)) {\n return true;\n }\n return false;\n}\n\n\nfunction subfieldValueLowercase(value, subfieldCode, tag) {\n if (skipSubfieldLowercase(value, subfieldCode, tag)) {\n return value;\n }\n\n //return value.toLowerCase();\n const newValue = value.toLowerCase();\n if (newValue !== value) {\n //nvdebug(`SVL ${tag} $${subfieldCode} '${value}' =>`, debugDev);\n //nvdebug(`SVL ${tag} $${subfieldCode} '${newValue}'`, debugDev);\n return newValue;\n }\n return value;\n}\n\nfunction subfieldLowercase(sf, tag) {\n sf.value = subfieldValueLowercase(sf.value, sf.code, tag); // eslint-disable-line functional/immutable-data\n}\n\nfunction fieldLowercase(field) {\n if (skipFieldLowercase(field)) {\n return;\n }\n\n field.subfields.forEach(sf => subfieldLowercase(sf, field.tag));\n\n function skipFieldLowercase(field) {\n if (skipAllFieldNormalizations(field.tag)) {\n return true;\n }\n // Skip non-interesting fields\n if (!containsHumanName(field.tag) && !containsCorporateName(field.tag) && !['240', '245', '630'].includes(field.tag)) {\n return true;\n }\n\n return false;\n }\n}\n\n\nfunction hack490SubfieldA(field) {\n if (field.tag !== '490') {\n return;\n }\n field.subfields.forEach(sf => removeSarja(sf));\n\n // NB! This won't work, if the punctuation has not been stripped beforehand!\n function removeSarja(subfield) {\n if (subfield.code !== 'a') {\n return;\n }\n const tmp = subfield.value.replace(/ ?-(?:[a-z]|ä|ö)*sarja$/u, '');\n if (tmp.length > 0) {\n subfield.value = tmp; // eslint-disable-line functional/immutable-data\n return;\n }\n }\n}\n\nexport function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {\n // NB! We don't do this to 020$z!\n if (subfieldCode === 'z' && ['765', '767', '770', '772', '773', '774', '776', '777', '780', '785', '786', '787'].includes(tag)) {\n return true;\n }\n if (tag === '020' && subfieldCode === 'a') {\n return true;\n }\n return false;\n}\n\nfunction looksLikeIsbn(value) {\n // Does not check validity!\n if (value.match(/^(?:[0-9]-?){9}(?:[0-9]-?[0-9]-?[0-9]-?)?[0-9Xx]$/u)) {\n return true;\n }\n return false;\n}\n\nfunction normalizeISBN(field) {\n if (!field.subfields) {\n return;\n }\n\n //nvdebug(`ISBN-field? ${fieldToString(field)}`);\n const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));\n relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));\n\n function normalizeIsbnSubfield(sf) {\n //nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);\n sf.value = sf.value.replace(/-/ug, ''); // eslint-disable-line functional/immutable-data\n sf.value = sf.value.replace(/x/u, 'X'); // eslint-disable-line functional/immutable-data\n }\n\n}\n\nfunction fieldSpecificHacks(field) {\n normalizeISBN(field); // 020$a, not $z!\n hack490SubfieldA(field);\n}\n\nexport function fieldTrimSubfieldValues(field) {\n field.subfields?.forEach((sf) => {\n sf.value = sf.value.replace(/^[ \\t\\n]+/u, ''); // eslint-disable-line functional/immutable-data\n sf.value = sf.value.replace(/[ \\t\\n]+$/u, ''); // eslint-disable-line functional/immutable-data\n sf.value = sf.value.replace(/[ \\t\\n]+/gu, ' '); // eslint-disable-line functional/immutable-data\n });\n}\n\nfunction fieldRemoveDecomposedDiacritics(field) {\n // Raison d'être/motivation: \"Sirén\" and diacriticless \"Siren\" might refer to a same surname, so this normalization\n // allows us to compare authors and avoid duplicate fields.\n field.subfields.forEach((sf) => {\n sf.value = removeDecomposedDiacritics(sf.value); // eslint-disable-line functional/immutable-data\n });\n}\n\nfunction removeDecomposedDiacritics(value = '') {\n // NB #1: Does nothing to precomposed letters. Do String.normalize('NFD') first, if you want to handle them.\n // NB #2: Finnish letters 'å', 'ä', 'ö', 'Å', Ä', and 'Ö' should be handled (=precomposed) before calling this. (= keep them as is)\n // NB #3: Calling our very own fixComposition() before this function handles both #1 and #2.\n return String(value).replace(/\\p{Diacritic}/gu, '');\n}\n\nfunction normalizeSubfieldValue(value, subfieldCode, tag) {\n // NB! For comparison of values only\n /* eslint-disable */\n value = subfieldValueLowercase(value, subfieldCode, tag);\n\n // Normalize: s. = sivut = pp.\n value = normalizePartData(value, subfieldCode, tag);\n value = value.replace(/^\\[([^[\\]]+)\\]/gu, '$1'); // eslint-disable-line functional/immutable-data, prefer-named-capture-group\n\n if (['130', '730'].includes(tag) && subfieldCode === 'a') {\n value = value.replace(' : ', ', '); // \"Halloween ends (elokuva, 2022)\" vs \"Halloween ends (elokuva : 2023)\"\n }\n /* eslint-enable */\n\n // Not going to do these in the foreseeable future, but keeping them here for discussion:\n // Possible normalizations include but are not limited to:\n // ø => ö? Might be language dependent: 041 $a fin => ö, 041 $a eng => o?\n // Ø => Ö?\n // ß => ss\n // þ => th (NB! Both upper and lower case)\n // ...\n // Probably nots:\n // ü => y (probably not, though this correlates with Finnish letter-to-sound rules)\n // w => v (OK for Finnish sorting in certain cases, but we are not here, are we?)\n // I guess we should use decomposed values in code here. (Not sure what composition my examples above use.)\n return value;\n}\n\nexport function cloneAndRemovePunctuation(field) {\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n fieldStripPunctuation(clonedField);\n fieldTrimSubfieldValues(clonedField);\n debugDev('PUNC');\n debugFieldComparison(field, clonedField);\n\n return clonedField;\n}\n\nfunction removeCharsThatDontCarryMeaning(value, tag, subfieldCode) {\n if (tag === '080') {\n return value;\n }\n /* eslint-disable */\n // 3\" refers to inches, but as this is for comparison only we don't mind...\n value = value.replace(/['\"]/gu, '');\n // MRA-273: Handle X00$a name initials.\n // NB #1: that we remove spaces for comparison (as it simpler), though actually space should be used. Doesn't matter as this is comparison only.\n // NB #2: we might/should eventually write a validator/fixer that adds those spaces. After that point, this expection should become obsolete.\n if (subfieldCode === 'a' && ['100', '400', '600', '700', '800'].includes(tag)) { // 400 is used in auth records. It's not a bib field at all.\n value = value.replace(/([A-Z]|Å|Ä|Ö)\\. +/ugi, '$1.');\n }\n /* eslint-enable */\n return value;\n}\n\nfunction normalizeField(field) {\n //sf.value = removeDecomposedDiacritics(sf.value); // eslint-disable-line functional/immutable-data\n fieldStripPunctuation(field);\n fieldLowercase(field);\n fieldNormalizeControlNumbers(field); // FIN11 vs FI-MELINDA etc.\n return field;\n}\n\nexport function cloneAndNormalizeFieldForComparison(field) {\n // NB! This new field is for comparison purposes only.\n // Some of the normalizations might be considered a bit overkill for other purposes.\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?\n sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag); // eslint-disable-line functional/immutable-data\n sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);// eslint-disable-line functional/immutable-data\n });\n\n normalizeField(clonedField); // eslint-disable-line functional/immutable-data\n fieldRemoveDecomposedDiacritics(clonedField);\n fieldSpecificHacks(clonedField);\n fieldTrimSubfieldValues(clonedField);\n\n\n debugFieldComparison(field, clonedField); // For debugging purposes only\n\n return clonedField;\n}\n\nfunction fieldSkipNormalization(field) {\n if (!field.subfields || ['018', '066', '080', '083'].includes(field.tag)) {\n return true;\n }\n return false;\n}\n"],"mappings":";;;;;;;;;AAKA,IAAAA,MAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,YAAA,GAAAD,OAAA;AACA,IAAAE,MAAA,GAAAF,OAAA;AAEA,IAAAG,qBAAA,GAAAH,OAAA;AACA,IAAAI,MAAA,GAAAL,sBAAA,CAAAC,OAAA;AACA,IAAAK,oCAAA,GAAAL,OAAA;AAAkG,SAAAD,uBAAAO,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAXlG;AACA;AACA;AACA;AACA;;AASA,MAAMG,KAAK,GAAG,IAAAC,cAAiB,EAAC,wDAAwD,CAAC;AACzF;AACA,MAAMC,QAAQ,GAAGF,KAAK,CAACG,MAAM,CAAC,KAAK,CAAC;AAEpC,SAASC,oBAAoBA,CAACC,QAAQ,EAAEC,QAAQ,EAAE;EAAE;EAClD;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMC,SAAS,GAAG,IAAAC,oBAAa,EAACH,QAAQ,CAAC;EACzC,MAAMI,SAAS,GAAG,IAAAD,oBAAa,EAACF,QAAQ,CAAC;EACzC,IAAIC,SAAS,KAAKE,SAAS,EAAE;IAC3B;EACF;EACA;AACF;;AAEA,SAASC,iBAAiBA,CAACC,GAAG,GAAG,KAAK,EAAEC,YAAY,GAAGC,SAAS,EAAE;EAChE;EACA,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACC,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9C,IAAIC,YAAY,KAAKC,SAAS,IAAID,YAAY,KAAK,GAAG,EAAE;MACtD,OAAO,IAAI;IACb;EACF;EACA;EACA,OAAO,KAAK;AACd;AAEA,SAASG,qBAAqBA,CAACJ,GAAG,GAAG,KAAK,EAAEC,YAAY,GAAGC,SAAS,EAAE;EACpE;EACA,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACC,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9C,IAAIC,YAAY,KAAKC,SAAS,IAAID,YAAY,KAAK,GAAG,EAAE;MACtD,OAAO,IAAI;IACb;EACF;EACA;EACA,OAAO,KAAK;AACd;AAEA,SAASI,6BAA6BA,CAACC,KAAK,EAAEL,YAAY,EAAED,GAAG,EAAE;EAG/D,IAAIC,YAAY,KAAK,GAAG,IAAIK,KAAK,KAAK,eAAe,EAAE;IACrD,OAAO,IAAI;EACb;EAGA,IAAIN,GAAG,KAAK,KAAK,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAACG,QAAQ,CAACF,YAAY,CAAC,EAAE;IAAE;IACxD,OAAO,IAAI;EACb;EAEA,IAAI,IAAAM,4BAAqB,EAACN,YAAY,CAAC,EAAE;IACvC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASO,qBAAqBA,CAACF,KAAK,EAAEL,YAAY,EAAED,GAAG,EAAE;EACvD;EACA,IAAI,IAAAS,6DAAwB,EAACT,GAAG,EAAEC,YAAY,CAAC,EAAE;IAC/C,OAAO,IAAI;EACb;EAEA,OAAOI,6BAA6B,CAACC,KAAK,EAAEL,YAAY,EAAED,GAAG,CAAC;AAChE;AAEA,SAASU,0BAA0BA,CAACV,GAAG,EAAE;EACvC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAACG,QAAQ,CAACH,GAAG,CAAC,EAAE;IAChC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAGA,SAASW,sBAAsBA,CAACL,KAAK,EAAEL,YAAY,EAAED,GAAG,EAAE;EACxD,IAAIQ,qBAAqB,CAACF,KAAK,EAAEL,YAAY,EAAED,GAAG,CAAC,EAAE;IACnD,OAAOM,KAAK;EACd;;EAEA;EACA,MAAMM,QAAQ,GAAGN,KAAK,CAACO,WAAW,CAAC,CAAC;EACpC,IAAID,QAAQ,KAAKN,KAAK,EAAE;IACtB;IACA;IACA,OAAOM,QAAQ;EACjB;EACA,OAAON,KAAK;AACd;AAEA,SAASQ,iBAAiBA,CAACC,EAAE,EAAEf,GAAG,EAAE;EAClCe,EAAE,CAACT,KAAK,GAAGK,sBAAsB,CAACI,EAAE,CAACT,KAAK,EAAES,EAAE,CAACC,IAAI,EAAEhB,GAAG,CAAC,CAAC,CAAC;AAC7D;;AAEA,SAASiB,cAAcA,CAACC,KAAK,EAAE;EAC7B,IAAIC,kBAAkB,CAACD,KAAK,CAAC,EAAE;IAC7B;EACF;EAEAA,KAAK,CAACE,SAAS,CAACC,OAAO,CAACN,EAAE,IAAID,iBAAiB,CAACC,EAAE,EAAEG,KAAK,CAAClB,GAAG,CAAC,CAAC;EAE/D,SAASmB,kBAAkBA,CAACD,KAAK,EAAE;IACjC,IAAIR,0BAA0B,CAACQ,KAAK,CAAClB,GAAG,CAAC,EAAE;MACzC,OAAO,IAAI;IACb;IACA;IACA,IAAI,CAACD,iBAAiB,CAACmB,KAAK,CAAClB,GAAG,CAAC,IAAI,CAACI,qBAAqB,CAACc,KAAK,CAAClB,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACG,QAAQ,CAACe,KAAK,CAAClB,GAAG,CAAC,EAAE;MACpH,OAAO,IAAI;IACb;IAEA,OAAO,KAAK;EACd;AACF;AAGA,SAASsB,gBAAgBA,CAACJ,KAAK,EAAE;EAC/B,IAAIA,KAAK,CAAClB,GAAG,KAAK,KAAK,EAAE;IACvB;EACF;EACAkB,KAAK,CAACE,SAAS,CAACC,OAAO,CAACN,EAAE,IAAIQ,WAAW,CAACR,EAAE,CAAC,CAAC;;EAE9C;EACA,SAASQ,WAAWA,CAACC,QAAQ,EAAE;IAC7B,IAAIA,QAAQ,CAACR,IAAI,KAAK,GAAG,EAAE;MACzB;IACF;IACA,MAAMS,GAAG,GAAGD,QAAQ,CAAClB,KAAK,CAACoB,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC;IAClE,IAAID,GAAG,CAACE,MAAM,GAAG,CAAC,EAAE;MAClBH,QAAQ,CAAClB,KAAK,GAAGmB,GAAG,CAAC,CAAC;MACtB;IACF;EACF;AACF;AAEO,SAASG,6BAA6BA,CAAC5B,GAAG,EAAEC,YAAY,EAAE;EAC/D;EACA,IAAIA,YAAY,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACE,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9H,OAAO,IAAI;EACb;EACA,IAAIA,GAAG,KAAK,KAAK,IAAIC,YAAY,KAAK,GAAG,EAAE;IACzC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAAS4B,aAAaA,CAACvB,KAAK,EAAE;EAC5B;EACA,IAAIA,KAAK,CAACwB,KAAK,CAAC,oDAAoD,CAAC,EAAE;IACrE,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASC,aAAaA,CAACb,KAAK,EAAE;EAC5B,IAAI,CAACA,KAAK,CAACE,SAAS,EAAE;IACpB;EACF;;EAEA;EACA,MAAMY,iBAAiB,GAAGd,KAAK,CAACE,SAAS,CAACa,MAAM,CAAClB,EAAE,IAAIa,6BAA6B,CAACV,KAAK,CAAClB,GAAG,EAAEe,EAAE,CAACC,IAAI,CAAC,IAAIa,aAAa,CAACd,EAAE,CAACT,KAAK,CAAC,CAAC;EACpI0B,iBAAiB,CAACX,OAAO,CAACN,EAAE,IAAImB,qBAAqB,CAACnB,EAAE,CAAC,CAAC;EAE1D,SAASmB,qBAAqBA,CAACnB,EAAE,EAAE;IACjC;IACAA,EAAE,CAACT,KAAK,GAAGS,EAAE,CAACT,KAAK,CAACoB,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC;IACxCX,EAAE,CAACT,KAAK,GAAGS,EAAE,CAACT,KAAK,CAACoB,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC;EAC1C;AAEF;;AAEA,SAASS,kBAAkBA,CAACjB,KAAK,EAAE;EACjCa,aAAa,CAACb,KAAK,CAAC,CAAC,CAAC;EACtBI,gBAAgB,CAACJ,KAAK,CAAC;AACzB;AAEO,SAASkB,uBAAuBA,CAAClB,KAAK,EAAE;EAC7CA,KAAK,CAACE,SAAS,EAAEC,OAAO,CAAEN,EAAE,IAAK;IAC/BA,EAAE,CAACT,KAAK,GAAGS,EAAE,CAACT,KAAK,CAACoB,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,CAAC;IAC/CX,EAAE,CAACT,KAAK,GAAGS,EAAE,CAACT,KAAK,CAACoB,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,CAAC;IAC/CX,EAAE,CAACT,KAAK,GAAGS,EAAE,CAACT,KAAK,CAACoB,OAAO,CAAC,YAAY,EAAE,GAAG,CAAC,CAAC,CAAC;EAClD,CAAC,CAAC;AACJ;;AAEA,SAASW,+BAA+BA,CAACnB,KAAK,EAAE;EAC9C;EACA;EACAA,KAAK,CAACE,SAAS,CAACC,OAAO,CAAEN,EAAE,IAAK;IAC9BA,EAAE,CAACT,KAAK,GAAGgC,0BAA0B,CAACvB,EAAE,CAACT,KAAK,CAAC,CAAC,CAAC;EACnD,CAAC,CAAC;AACJ;;AAEA,SAASgC,0BAA0BA,CAAChC,KAAK,GAAG,EAAE,EAAE;EAC9C;EACA;EACA;EACA,OAAOiC,MAAM,CAACjC,KAAK,CAAC,CAACoB,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;AACrD;AAEA,SAASc,sBAAsBA,CAAClC,KAAK,EAAEL,YAAY,EAAED,GAAG,EAAE;EACxD;EACA;EACAM,KAAK,GAAGK,sBAAsB,CAACL,KAAK,EAAEL,YAAY,EAAED,GAAG,CAAC;;EAExD;EACAM,KAAK,GAAG,IAAAmC,sDAAiB,EAACnC,KAAK,EAAEL,YAAY,EAAED,GAAG,CAAC;EACnDM,KAAK,GAAGA,KAAK,CAACoB,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC,CAAC;;EAEjD,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAACvB,QAAQ,CAACH,GAAG,CAAC,IAAIC,YAAY,KAAK,GAAG,EAAE;IACxDK,KAAK,GAAGA,KAAK,CAACoB,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;EACtC;EACA;;EAEA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA,OAAOpB,KAAK;AACd;AAEO,SAASoC,yBAAyBA,CAACxB,KAAK,EAAE;EAC/C,MAAMyB,WAAW,GAAG,IAAAC,cAAK,EAAC1B,KAAK,CAAC;EAChC,IAAI2B,sBAAsB,CAAC3B,KAAK,CAAC,EAAE;IACjC,OAAOyB,WAAW;EACpB;EACA,IAAAG,kCAAqB,EAACH,WAAW,CAAC;EAClCP,uBAAuB,CAACO,WAAW,CAAC;EACpCpD,QAAQ,CAAC,MAAM,CAAC;EAChBE,oBAAoB,CAACyB,KAAK,EAAEyB,WAAW,CAAC;EAExC,OAAOA,WAAW;AACpB;AAEA,SAASI,+BAA+BA,CAACzC,KAAK,EAAEN,GAAG,EAAEC,YAAY,EAAE;EACjE,IAAID,GAAG,KAAK,KAAK,EAAE;IACjB,OAAOM,KAAK;EACd;EACA;EACA;EACAA,KAAK,GAAGA,KAAK,CAACoB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;EACnC;EACA;EACA;EACA,IAAIzB,YAAY,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACE,QAAQ,CAACH,GAAG,CAAC,EAAE;IAAE;IAC/EM,KAAK,GAAGA,KAAK,CAACoB,OAAO,CAAC,sBAAsB,EAAE,KAAK,CAAC;EACtD;EACA;EACA,OAAOpB,KAAK;AACd;AAEA,SAAS0C,cAAcA,CAAC9B,KAAK,EAAE;EAC7B;EACA,IAAA4B,kCAAqB,EAAC5B,KAAK,CAAC;EAC5BD,cAAc,CAACC,KAAK,CAAC;EACrB,IAAA+B,kDAA4B,EAAC/B,KAAK,CAAC,CAAC,CAAC;EACrC,OAAOA,KAAK;AACd;AAEO,SAASgC,mCAAmCA,CAAChC,KAAK,EAAE;EACzD;EACA;EACA,MAAMyB,WAAW,GAAG,IAAAC,cAAK,EAAC1B,KAAK,CAAC;EAChC,IAAI2B,sBAAsB,CAAC3B,KAAK,CAAC,EAAE;IACjC,OAAOyB,WAAW;EACpB;EACAA,WAAW,CAACvB,SAAS,CAACC,OAAO,CAAEN,EAAE,IAAK;IAAE;IACtCA,EAAE,CAACT,KAAK,GAAGkC,sBAAsB,CAACzB,EAAE,CAACT,KAAK,EAAES,EAAE,CAACC,IAAI,EAAEE,KAAK,CAAClB,GAAG,CAAC,CAAC,CAAC;IACjEe,EAAE,CAACT,KAAK,GAAGyC,+BAA+B,CAAChC,EAAE,CAACT,KAAK,EAAEY,KAAK,CAAClB,GAAG,EAAEe,EAAE,CAACC,IAAI,CAAC,CAAC;EAC3E,CAAC,CAAC;;EAEFgC,cAAc,CAACL,WAAW,CAAC,CAAC,CAAC;EAC7BN,+BAA+B,CAACM,WAAW,CAAC;EAC5CR,kBAAkB,CAACQ,WAAW,CAAC;EAC/BP,uBAAuB,CAACO,WAAW,CAAC;EAGpClD,oBAAoB,CAACyB,KAAK,EAAEyB,WAAW,CAAC,CAAC,CAAC;;EAE1C,OAAOA,WAAW;AACpB;AAEA,SAASE,sBAAsBA,CAAC3B,KAAK,EAAE;EACrC,IAAI,CAACA,KAAK,CAACE,SAAS,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACjB,QAAQ,CAACe,KAAK,CAAClB,GAAG,CAAC,EAAE;IACxE,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd"}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.normalizePartData = normalizePartData;
|
|
7
|
+
exports.partsAgree = partsAgree;
|
|
8
|
+
exports.subfieldContainsPartData = subfieldContainsPartData;
|
|
9
|
+
var _utils = require("./utils");
|
|
10
|
+
var _debug = _interopRequireDefault(require("debug"));
|
|
11
|
+
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
12
|
+
// Normalizes at least 490$v and 773$g which contain information such as "Raita 5" vs "5", and "Osa 3" vs "Osa III".
|
|
13
|
+
|
|
14
|
+
const debug = (0, _debug.default)('@natlibfi/melinda-marc-record-merge-reducers:normalizePart');
|
|
15
|
+
//const debugData = debug.extend('data');
|
|
16
|
+
const debugDev = debug.extend('dev');
|
|
17
|
+
function subfieldContainsPartData(tag, subfieldCode) {
|
|
18
|
+
if (subfieldCode === 'v' && ['490', '800', '810', '811', '830'].includes(tag)) {
|
|
19
|
+
return true;
|
|
20
|
+
}
|
|
21
|
+
if (tag === '773' && subfieldCode === 'g') {
|
|
22
|
+
return true;
|
|
23
|
+
}
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
function splitPartData(originalValue) {
|
|
27
|
+
const value = originalValue.replace(/^\[([0-9]+)\][-.,:; ]*$/ui, '$1'); // eslint-disable-line prefer-named-capture-group
|
|
28
|
+
const splitPoint = value.lastIndexOf(' ');
|
|
29
|
+
if (splitPoint === -1) {
|
|
30
|
+
return [undefined, value];
|
|
31
|
+
}
|
|
32
|
+
const lhs = value.substr(0, splitPoint);
|
|
33
|
+
const rhs = value.substr(splitPoint + 1);
|
|
34
|
+
return [lhs, rhs];
|
|
35
|
+
}
|
|
36
|
+
function normalizePartType(originalValue) {
|
|
37
|
+
if (originalValue === undefined) {
|
|
38
|
+
return undefined;
|
|
39
|
+
}
|
|
40
|
+
const value = originalValue.toLowerCase();
|
|
41
|
+
// Return Finnish singular nominative. Best-ish for debug purposes...
|
|
42
|
+
if (['osa', 'part', 'teil'].includes(value)) {
|
|
43
|
+
return 'osa';
|
|
44
|
+
}
|
|
45
|
+
if (['p.', 'page', 'pages', 'pp.', 's.', 'sidor', 'sivu', 'sivut'].includes(value)) {
|
|
46
|
+
return 'sivu';
|
|
47
|
+
}
|
|
48
|
+
return value;
|
|
49
|
+
}
|
|
50
|
+
const romanNumbers = {
|
|
51
|
+
'I': '1',
|
|
52
|
+
'II': '2',
|
|
53
|
+
'III': '3',
|
|
54
|
+
'IV': '4',
|
|
55
|
+
'V': '5',
|
|
56
|
+
'VI': '6',
|
|
57
|
+
'X': '10'
|
|
58
|
+
};
|
|
59
|
+
function normalizePartNumber(value) {
|
|
60
|
+
// Should we handle all Roman numbers or some range of them?
|
|
61
|
+
// There's probably a library for our purposes..
|
|
62
|
+
if (value in romanNumbers) {
|
|
63
|
+
const arabicValue = romanNumbers[value];
|
|
64
|
+
(0, _utils.nvdebug)(` MAP ${value} to ${arabicValue}`, debugDev);
|
|
65
|
+
return arabicValue;
|
|
66
|
+
}
|
|
67
|
+
return value.toLowerCase();
|
|
68
|
+
}
|
|
69
|
+
function splitAndNormalizePartData(value) {
|
|
70
|
+
// This is just a stub. Does not handle eg. "Levy 2, raita 15"
|
|
71
|
+
const [lhs, rhs] = splitPartData(value);
|
|
72
|
+
(0, _utils.nvdebug)(` LHS: '${lhs}'`, debugDev);
|
|
73
|
+
(0, _utils.nvdebug)(` RHS: '${rhs}'`, debugDev);
|
|
74
|
+
const partType = normalizePartType(lhs);
|
|
75
|
+
const partNumber = normalizePartNumber(rhs);
|
|
76
|
+
return [partType, partNumber];
|
|
77
|
+
}
|
|
78
|
+
function partsAgree(value1, value2, tag, subfieldCode) {
|
|
79
|
+
if (!subfieldContainsPartData(tag, subfieldCode)) {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
const [partType1, partNumber1] = splitAndNormalizePartData(value1);
|
|
83
|
+
const [partType2, partNumber2] = splitAndNormalizePartData(value2);
|
|
84
|
+
if (partNumber1 !== partNumber2) {
|
|
85
|
+
return false;
|
|
86
|
+
}
|
|
87
|
+
if (partType1 === undefined || partType2 === undefined || partType1 === partType2) {
|
|
88
|
+
return true;
|
|
89
|
+
}
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
function normalizePartData(value, subfieldCode, tag) {
|
|
93
|
+
// This is for normalizing values for equality comparison only!
|
|
94
|
+
if (!subfieldContainsPartData(tag, subfieldCode)) {
|
|
95
|
+
return value;
|
|
96
|
+
}
|
|
97
|
+
const [partType, partNumber] = splitAndNormalizePartData(value);
|
|
98
|
+
if (partType === undefined) {
|
|
99
|
+
return partNumber;
|
|
100
|
+
}
|
|
101
|
+
return `${partType} ${partNumber}`;
|
|
102
|
+
}
|
|
103
|
+
//# sourceMappingURL=normalizeSubfieldValueForComparison.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"normalizeSubfieldValueForComparison.js","names":["_utils","require","_debug","_interopRequireDefault","obj","__esModule","default","debug","createDebugLogger","debugDev","extend","subfieldContainsPartData","tag","subfieldCode","includes","splitPartData","originalValue","value","replace","splitPoint","lastIndexOf","undefined","lhs","substr","rhs","normalizePartType","toLowerCase","romanNumbers","normalizePartNumber","arabicValue","nvdebug","splitAndNormalizePartData","partType","partNumber","partsAgree","value1","value2","partType1","partNumber1","partType2","partNumber2","normalizePartData"],"sources":["../src/normalizeSubfieldValueForComparison.js"],"sourcesContent":["import {nvdebug} from './utils';\nimport createDebugLogger from 'debug';\n\n// Normalizes at least 490$v and 773$g which contain information such as \"Raita 5\" vs \"5\", and \"Osa 3\" vs \"Osa III\".\n\nconst debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizePart');\n//const debugData = debug.extend('data');\nconst debugDev = debug.extend('dev');\n\nexport function subfieldContainsPartData(tag, subfieldCode) {\n if (subfieldCode === 'v' && ['490', '800', '810', '811', '830'].includes(tag)) {\n return true;\n }\n if (tag === '773' && subfieldCode === 'g') {\n return true;\n }\n return false;\n}\n\nfunction splitPartData(originalValue) {\n const value = originalValue.replace(/^\\[([0-9]+)\\][-.,:; ]*$/ui, '$1'); // eslint-disable-line prefer-named-capture-group\n const splitPoint = value.lastIndexOf(' ');\n if (splitPoint === -1) {\n return [undefined, value];\n }\n const lhs = value.substr(0, splitPoint);\n const rhs = value.substr(splitPoint + 1);\n return [lhs, rhs];\n}\n\nfunction normalizePartType(originalValue) {\n if (originalValue === undefined) {\n return undefined;\n }\n const value = originalValue.toLowerCase();\n // Return Finnish singular nominative. Best-ish for debug purposes...\n if (['osa', 'part', 'teil'].includes(value)) {\n return 'osa';\n }\n if (['p.', 'page', 'pages', 'pp.', 's.', 'sidor', 'sivu', 'sivut'].includes(value)) {\n return 'sivu';\n }\n return value;\n}\n\nconst romanNumbers = {'I': '1', 'II': '2', 'III': '3', 'IV': '4', 'V': '5', 'VI': '6', 'X': '10'};\n\nfunction normalizePartNumber(value) {\n // Should we handle all Roman numbers or some range of them?\n // There's probably a library for our purposes..\n if (value in romanNumbers) {\n const arabicValue = romanNumbers[value];\n nvdebug(` MAP ${value} to ${arabicValue}`, debugDev);\n return arabicValue;\n }\n return value.toLowerCase();\n}\n\nfunction splitAndNormalizePartData(value) {\n // This is just a stub. Does not handle eg. \"Levy 2, raita 15\"\n const [lhs, rhs] = splitPartData(value);\n nvdebug(` LHS: '${lhs}'`, debugDev);\n nvdebug(` RHS: '${rhs}'`, debugDev);\n const partType = normalizePartType(lhs);\n const partNumber = normalizePartNumber(rhs);\n return [partType, partNumber];\n}\n\nexport function partsAgree(value1, value2, tag, subfieldCode) {\n if (!subfieldContainsPartData(tag, subfieldCode)) {\n return false;\n }\n const [partType1, partNumber1] = splitAndNormalizePartData(value1);\n const [partType2, partNumber2] = splitAndNormalizePartData(value2);\n if (partNumber1 !== partNumber2) {\n return false;\n }\n if (partType1 === undefined || partType2 === undefined || partType1 === partType2) {\n return true;\n }\n\n return false;\n}\n\nexport function normalizePartData(value, subfieldCode, tag) {\n // This is for normalizing values for equality comparison only!\n if (!subfieldContainsPartData(tag, subfieldCode)) {\n return value;\n }\n\n const [partType, partNumber] = splitAndNormalizePartData(value);\n if (partType === undefined) {\n return partNumber;\n }\n return `${partType} ${partNumber}`;\n}\n"],"mappings":";;;;;;;;AAAA,IAAAA,MAAA,GAAAC,OAAA;AACA,IAAAC,MAAA,GAAAC,sBAAA,CAAAF,OAAA;AAAsC,SAAAE,uBAAAC,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAEtC;;AAEA,MAAMG,KAAK,GAAG,IAAAC,cAAiB,EAAC,4DAA4D,CAAC;AAC7F;AACA,MAAMC,QAAQ,GAAGF,KAAK,CAACG,MAAM,CAAC,KAAK,CAAC;AAE7B,SAASC,wBAAwBA,CAACC,GAAG,EAAEC,YAAY,EAAE;EAC1D,IAAIA,YAAY,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACC,QAAQ,CAACF,GAAG,CAAC,EAAE;IAC7E,OAAO,IAAI;EACb;EACA,IAAIA,GAAG,KAAK,KAAK,IAAIC,YAAY,KAAK,GAAG,EAAE;IACzC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASE,aAAaA,CAACC,aAAa,EAAE;EACpC,MAAMC,KAAK,GAAGD,aAAa,CAACE,OAAO,CAAC,2BAA2B,EAAE,IAAI,CAAC,CAAC,CAAC;EACxE,MAAMC,UAAU,GAAGF,KAAK,CAACG,WAAW,CAAC,GAAG,CAAC;EACzC,IAAID,UAAU,KAAK,CAAC,CAAC,EAAE;IACrB,OAAO,CAACE,SAAS,EAAEJ,KAAK,CAAC;EAC3B;EACA,MAAMK,GAAG,GAAGL,KAAK,CAACM,MAAM,CAAC,CAAC,EAAEJ,UAAU,CAAC;EACvC,MAAMK,GAAG,GAAGP,KAAK,CAACM,MAAM,CAACJ,UAAU,GAAG,CAAC,CAAC;EACxC,OAAO,CAACG,GAAG,EAAEE,GAAG,CAAC;AACnB;AAEA,SAASC,iBAAiBA,CAACT,aAAa,EAAE;EACxC,IAAIA,aAAa,KAAKK,SAAS,EAAE;IAC/B,OAAOA,SAAS;EAClB;EACA,MAAMJ,KAAK,GAAGD,aAAa,CAACU,WAAW,CAAC,CAAC;EACzC;EACA,IAAI,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,CAACZ,QAAQ,CAACG,KAAK,CAAC,EAAE;IAC3C,OAAO,KAAK;EACd;EACA,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,CAACH,QAAQ,CAACG,KAAK,CAAC,EAAE;IAClF,OAAO,MAAM;EACf;EACA,OAAOA,KAAK;AACd;AAEA,MAAMU,YAAY,GAAG;EAAC,GAAG,EAAE,GAAG;EAAE,IAAI,EAAE,GAAG;EAAE,KAAK,EAAE,GAAG;EAAE,IAAI,EAAE,GAAG;EAAE,GAAG,EAAE,GAAG;EAAE,IAAI,EAAE,GAAG;EAAE,GAAG,EAAE;AAAI,CAAC;AAEjG,SAASC,mBAAmBA,CAACX,KAAK,EAAE;EAClC;EACA;EACA,IAAIA,KAAK,IAAIU,YAAY,EAAE;IACzB,MAAME,WAAW,GAAGF,YAAY,CAACV,KAAK,CAAC;IACvC,IAAAa,cAAO,EAAE,QAAOb,KAAM,OAAMY,WAAY,EAAC,EAAEpB,QAAQ,CAAC;IACpD,OAAOoB,WAAW;EACpB;EACA,OAAOZ,KAAK,CAACS,WAAW,CAAC,CAAC;AAC5B;AAEA,SAASK,yBAAyBA,CAACd,KAAK,EAAE;EACxC;EACA,MAAM,CAACK,GAAG,EAAEE,GAAG,CAAC,GAAGT,aAAa,CAACE,KAAK,CAAC;EACvC,IAAAa,cAAO,EAAE,WAAUR,GAAI,GAAE,EAAEb,QAAQ,CAAC;EACpC,IAAAqB,cAAO,EAAE,WAAUN,GAAI,GAAE,EAAEf,QAAQ,CAAC;EACpC,MAAMuB,QAAQ,GAAGP,iBAAiB,CAACH,GAAG,CAAC;EACvC,MAAMW,UAAU,GAAGL,mBAAmB,CAACJ,GAAG,CAAC;EAC3C,OAAO,CAACQ,QAAQ,EAAEC,UAAU,CAAC;AAC/B;AAEO,SAASC,UAAUA,CAACC,MAAM,EAAEC,MAAM,EAAExB,GAAG,EAAEC,YAAY,EAAE;EAC5D,IAAI,CAACF,wBAAwB,CAACC,GAAG,EAAEC,YAAY,CAAC,EAAE;IAChD,OAAO,KAAK;EACd;EACA,MAAM,CAACwB,SAAS,EAAEC,WAAW,CAAC,GAAGP,yBAAyB,CAACI,MAAM,CAAC;EAClE,MAAM,CAACI,SAAS,EAAEC,WAAW,CAAC,GAAGT,yBAAyB,CAACK,MAAM,CAAC;EAClE,IAAIE,WAAW,KAAKE,WAAW,EAAE;IAC/B,OAAO,KAAK;EACd;EACA,IAAIH,SAAS,KAAKhB,SAAS,IAAIkB,SAAS,KAAKlB,SAAS,IAAIgB,SAAS,KAAKE,SAAS,EAAE;IACjF,OAAO,IAAI;EACb;EAEA,OAAO,KAAK;AACd;AAEO,SAASE,iBAAiBA,CAACxB,KAAK,EAAEJ,YAAY,EAAED,GAAG,EAAE;EAC1D;EACA,IAAI,CAACD,wBAAwB,CAACC,GAAG,EAAEC,YAAY,CAAC,EAAE;IAChD,OAAOI,KAAK;EACd;EAEA,MAAM,CAACe,QAAQ,EAAEC,UAAU,CAAC,GAAGF,yBAAyB,CAACd,KAAK,CAAC;EAC/D,IAAIe,QAAQ,KAAKX,SAAS,EAAE;IAC1B,OAAOY,UAAU;EACnB;EACA,OAAQ,GAAED,QAAS,IAAGC,UAAW,EAAC;AACpC"}
|
package/dist/punctuation2.js
CHANGED
|
@@ -232,6 +232,20 @@ const linkingEntryWhatever = [{
|
|
|
232
232
|
'followedBy': 'abdghiklmnopqrstuwxyz',
|
|
233
233
|
'remove': /\. -$/u
|
|
234
234
|
}];
|
|
235
|
+
|
|
236
|
+
// '!' means negation, thus '!b' means any other subfield but 'b'.
|
|
237
|
+
// 'followedBy': '#' means that current subfield is the last subfield.
|
|
238
|
+
// NB! Note that control subfields are ignored in punctuation rules.
|
|
239
|
+
// NB #2! Control field ignorance causes issues with field 257: https://wiki.helsinki.fi/display/rdasovellusohje/Loppupisteohje
|
|
240
|
+
// Might need to work on that at some point. NOT a top priority though.
|
|
241
|
+
// NB #3! Final punctuation creation is/should be handled by endind-punctuation.js validator!
|
|
242
|
+
|
|
243
|
+
const crappy246 = [{
|
|
244
|
+
'code': 'abfghinp',
|
|
245
|
+
'followedBy': '#',
|
|
246
|
+
'remove': /\.$/u,
|
|
247
|
+
'context': dotIsProbablyPunc
|
|
248
|
+
}];
|
|
235
249
|
const cleanCrappyPunctuationRules = {
|
|
236
250
|
'100': removeX00Whatever,
|
|
237
251
|
'110': removeX10Whatever,
|
|
@@ -245,6 +259,7 @@ const cleanCrappyPunctuationRules = {
|
|
|
245
259
|
'remove': /\.$/u,
|
|
246
260
|
'context': dotIsProbablyPunc
|
|
247
261
|
}],
|
|
262
|
+
'246': crappy246,
|
|
248
263
|
'300': [{
|
|
249
264
|
'code': 'a',
|
|
250
265
|
'followedBy': '!b',
|
|
@@ -280,7 +295,8 @@ const cleanCrappyPunctuationRules = {
|
|
|
280
295
|
'776': linkingEntryWhatever,
|
|
281
296
|
'800': removeX00Whatever,
|
|
282
297
|
'810': removeX10Whatever,
|
|
283
|
-
'830': remove490And830Whatever
|
|
298
|
+
'830': remove490And830Whatever,
|
|
299
|
+
'946': crappy246
|
|
284
300
|
};
|
|
285
301
|
const cleanLegalX00Comma = {
|
|
286
302
|
'code': 'abcde',
|
|
@@ -343,7 +359,7 @@ const cleanLegalSeriesTitle = [
|
|
|
343
359
|
const clean24X = [{
|
|
344
360
|
'name': 'I:A',
|
|
345
361
|
'code': 'i',
|
|
346
|
-
'followedBy': '
|
|
362
|
+
'followedBy': 'a',
|
|
347
363
|
'remove': / *:$/u
|
|
348
364
|
}, {
|
|
349
365
|
'name': 'A:B',
|
|
@@ -614,7 +630,12 @@ const addPairedPunctuationRules = {
|
|
|
614
630
|
'800': addX00,
|
|
615
631
|
'810': addX10,
|
|
616
632
|
'830': addSeriesTitle,
|
|
617
|
-
'946':
|
|
633
|
+
'946': [{
|
|
634
|
+
'code': 'i',
|
|
635
|
+
'followedBy': 'a',
|
|
636
|
+
'add': ':',
|
|
637
|
+
'context': defaultNeedsPuncAfter
|
|
638
|
+
}]
|
|
618
639
|
};
|
|
619
640
|
function ruleAppliesToSubfieldCode(targetSubfieldCodes, currSubfieldCode) {
|
|
620
641
|
const negation = targetSubfieldCodes.includes('!');
|