@natlibfi/marc-record-validators-melinda 11.3.1 → 11.3.2-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/index.js +7 -0
  2. package/dist/index.js.map +1 -1
  3. package/dist/melindaCustomMergeFields.json +5120 -0
  4. package/dist/merge-fields/config.json +83 -0
  5. package/dist/merge-fields/controlSubfields.js +278 -0
  6. package/dist/merge-fields/controlSubfields.js.map +1 -0
  7. package/dist/merge-fields/counterpartField.js +674 -0
  8. package/dist/merge-fields/counterpartField.js.map +1 -0
  9. package/dist/merge-fields/index.js +76 -0
  10. package/dist/merge-fields/index.js.map +1 -0
  11. package/dist/merge-fields/mergableIndicator.js +95 -0
  12. package/dist/merge-fields/mergableIndicator.js.map +1 -0
  13. package/dist/merge-fields/mergableTag.js +33 -0
  14. package/dist/merge-fields/mergableTag.js.map +1 -0
  15. package/dist/merge-fields/mergeConstraints.js +1225 -0
  16. package/dist/merge-fields/mergeConstraints.js.map +1 -0
  17. package/dist/merge-fields/mergeField.js +190 -0
  18. package/dist/merge-fields/mergeField.js.map +1 -0
  19. package/dist/merge-fields/mergeIndicator.js +171 -0
  20. package/dist/merge-fields/mergeIndicator.js.map +1 -0
  21. package/dist/merge-fields/mergeOrAddPostprocess.js +57 -0
  22. package/dist/merge-fields/mergeOrAddPostprocess.js.map +1 -0
  23. package/dist/merge-fields/mergeOrAddSubfield.js +203 -0
  24. package/dist/merge-fields/mergeOrAddSubfield.js.map +1 -0
  25. package/dist/merge-fields/mergeSubfield.js +277 -0
  26. package/dist/merge-fields/mergeSubfield.js.map +1 -0
  27. package/dist/merge-fields/removeDuplicateSubfields.js +48 -0
  28. package/dist/merge-fields/removeDuplicateSubfields.js.map +1 -0
  29. package/dist/merge-fields/worldKnowledge.js +98 -0
  30. package/dist/merge-fields/worldKnowledge.js.map +1 -0
  31. package/dist/merge-fields.spec.js +51 -0
  32. package/dist/merge-fields.spec.js.map +1 -0
  33. package/dist/subfield6Utils.js +16 -1
  34. package/dist/subfield6Utils.js.map +1 -1
  35. package/dist/utils.js +108 -0
  36. package/dist/utils.js.map +1 -1
  37. package/package.json +6 -6
  38. package/src/index.js +3 -1
  39. package/src/melindaCustomMergeFields.json +5120 -0
  40. package/src/merge-fields/config.json +83 -0
  41. package/src/merge-fields/controlSubfields.js +307 -0
  42. package/src/merge-fields/counterpartField.js +736 -0
  43. package/src/merge-fields/index.js +69 -0
  44. package/src/merge-fields/mergableIndicator.js +90 -0
  45. package/src/merge-fields/mergableTag.js +89 -0
  46. package/src/merge-fields/mergeConstraints.js +309 -0
  47. package/src/merge-fields/mergeField.js +187 -0
  48. package/src/merge-fields/mergeIndicator.js +185 -0
  49. package/src/merge-fields/mergeOrAddPostprocess.js +56 -0
  50. package/src/merge-fields/mergeOrAddSubfield.js +218 -0
  51. package/src/merge-fields/mergeSubfield.js +306 -0
  52. package/src/merge-fields/removeDuplicateSubfields.js +50 -0
  53. package/src/merge-fields/worldKnowledge.js +104 -0
  54. package/src/merge-fields.spec.js +52 -0
  55. package/src/subfield6Utils.js +14 -1
  56. package/src/utils.js +119 -0
  57. package/test-fixtures/merge-fields/f01/expectedResult.json +11 -0
  58. package/test-fixtures/merge-fields/f01/metadata.json +5 -0
  59. package/test-fixtures/merge-fields/f01/record.json +13 -0
  60. package/test-fixtures/merge-fields/f02/expectedResult.json +14 -0
  61. package/test-fixtures/merge-fields/f02/metadata.json +6 -0
  62. package/test-fixtures/merge-fields/f02/record.json +16 -0
  63. package/test-fixtures/merge-fields/f03/expectedResult.json +17 -0
  64. package/test-fixtures/merge-fields/f03/metadata.json +7 -0
  65. package/test-fixtures/merge-fields/f03/record.json +23 -0
  66. package/test-fixtures/merge-fields/f04/expectedResult.json +14 -0
  67. package/test-fixtures/merge-fields/f04/metadata.json +5 -0
  68. package/test-fixtures/merge-fields/f04/record.json +19 -0
  69. package/test-fixtures/merge-fields/v01/expectedResult.json +6 -0
  70. package/test-fixtures/merge-fields/v01/metadata.json +5 -0
  71. package/test-fixtures/merge-fields/v01/record.json +13 -0
  72. package/test-fixtures/merge-fields/v02/expectedResult.json +4 -0
  73. package/test-fixtures/merge-fields/v02/metadata.json +5 -0
  74. package/test-fixtures/merge-fields/v02/record.json +13 -0
  75. package/test-fixtures/merge-fields/v03/expectedResult.json +6 -0
  76. package/test-fixtures/merge-fields/v03/metadata.json +6 -0
  77. package/test-fixtures/merge-fields/v03/record.json +16 -0
  78. package/test-fixtures/merge-fields/v04/expectedResult.json +4 -0
  79. package/test-fixtures/merge-fields/v04/metadata.json +6 -0
  80. package/test-fixtures/merge-fields/v04/record.json +16 -0
@@ -0,0 +1,306 @@
1
+ import createDebugLogger from 'debug';
2
+ import {partsAgree, subfieldContainsPartData} from '../normalizeSubfieldValueForComparison';
3
+ import {valueCarriesMeaning} from './worldKnowledge';
4
+ import {nvdebug} from '../utils';
5
+ import {tagAndSubfieldCodeReferToIsbn} from '../normalizeFieldForComparison.js';
6
+ import {canContainOptionalQualifier, splitToNameAndQualifier} from './counterpartField';
7
+
8
+ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:mergeSubfield');
9
+ //const debugData = debug.extend('data');
10
+ const debugDev = debug.extend('dev');
11
+
12
+ // NB! These are X00 specific. Should we somehow parametrize them?
13
+ const onlyBirthYear = /^[1-9][0-9]*-[,.]?$/u;
14
+ const onlyDeathYear = /^-[1-9][0-9]*[,.]?$/u;
15
+ const birthYearAndDeathYear = /^[1-9][0-9]*-[1-9][0-9]*[,.]?$/u;
16
+
17
+ function getDeathYear(str) {
18
+ return parseInt(str.substring(str.indexOf('-') + 1), 10);
19
+ }
20
+
21
+ function isValidBirthYearAndDeathYear(str) {
22
+ if (!birthYearAndDeathYear.test(str)) {
23
+ return false;
24
+ }
25
+ // We have two years
26
+ const b = parseInt(str, 10);
27
+ const d = getDeathYear(str);
28
+ if (b > d) { // died before birth! Rather unlikely.
29
+ return false;
30
+ }
31
+ if (d - b > 125) { // Over 125 years old. Rather unlikely.
32
+ return false;
33
+ }
34
+ // Possible sanity check: Died after current year?
35
+ return true;
36
+ }
37
+
38
+ function anyYear(str) {
39
+ if (onlyBirthYear.test(str) || onlyDeathYear.test(str) || isValidBirthYearAndDeathYear(str)) {
40
+ return true;
41
+ }
42
+ return false;
43
+ }
44
+
45
+ function replaceEntrysBirthAndDeathYear(targetField, candSubfield, relevantSubfields) {
46
+ if (birthYearAndDeathYear.test(candSubfield.value)) {
47
+ if (onlyBirthYear.test(relevantSubfields[0].value) && parseInt(relevantSubfields[0].value, 10) === parseInt(candSubfield.value, 10)) {
48
+ relevantSubfields[0].value = candSubfield.value; // eslint-disable-line functional/immutable-data
49
+ return true;
50
+ }
51
+
52
+ if (onlyDeathYear.test(relevantSubfields[0].value) && getDeathYear(relevantSubfields[0].value) === getDeathYear(candSubfield.value)) {
53
+ relevantSubfields[0].value = candSubfield.value; // eslint-disable-line functional/immutable-data
54
+ return true;
55
+ }
56
+ }
57
+ return false;
58
+ }
59
+
60
+ function replaceDatesAssociatedWithName(targetField, candSubfield, relevantSubfields) {
61
+ // Add also the death year, if the original value only contains birth year.
62
+ // This function treats only with X00$d subfields:
63
+ if (candSubfield.code !== 'd' || !(/^[1678]00$/u).test(targetField.tag)) { // njsscan-ignore: regex_dos
64
+ return false;
65
+ }
66
+
67
+ if (!anyYear(relevantSubfields[0].value) && anyYear(candSubfield.value)) {
68
+ relevantSubfields[0].value = candSubfield.value; // eslint-disable-line functional/immutable-data
69
+ return true;
70
+ }
71
+
72
+ if (replaceEntrysBirthAndDeathYear(targetField, candSubfield, relevantSubfields)) {
73
+ return true;
74
+ }
75
+ return false;
76
+ }
77
+
78
+ // use array.includes(value) for easy extendability (Swedish, other languages, abbrs, etc.()
79
+ function isKierreselka(value) {
80
+ return ['kierreselkä', 'spiral bound', 'spiral-bound', 'spiralrygg'].includes(value);
81
+ }
82
+
83
+ function isKovakantinen(value) {
84
+ return ['hardback', 'hardcover', 'hårda pärmar', 'kovakantinen'].includes(value);
85
+ }
86
+
87
+ function isPehmeakantinen(value) {
88
+ return ['mjuka pärmar', 'paperback', 'pehmeäkantinen', 'softcover'].includes(value);
89
+ }
90
+
91
+ function isItsenainenJatkoOsa(value) {
92
+ if (value.match(/^Fristående fortsättning på verket[^a-z]*$/ui)) {
93
+ return true;
94
+ }
95
+ if (value.match(/^Itsenäinen jatko-osa teokselle[^a-z]*$/ui)) {
96
+ return true;
97
+ }
98
+ return false;
99
+ }
100
+
101
+ function isSisaltaaTeos(value) {
102
+ if (value.match(/^Innehåller \(verk\)[^a-z]*$/ui)) {
103
+ return true;
104
+ }
105
+ if (value.match(/^Sisältää \(teos\)[^a-z]*$/ui)) {
106
+ return true;
107
+ }
108
+ return false;
109
+ }
110
+ function relationInformationMatches(candSubfield, relevantSubfields) {
111
+ if (isSisaltaaTeos(candSubfield.value) && relevantSubfields.some(sf => isSisaltaaTeos(sf.value))) {
112
+ return true;
113
+ }
114
+ if (isItsenainenJatkoOsa(candSubfield.value) && relevantSubfields.some(sf => isItsenainenJatkoOsa(sf.value))) {
115
+ return true;
116
+ }
117
+
118
+ return false;
119
+ }
120
+
121
+ function coverTypesMatch(candSubfield, relevantSubfields) {
122
+ if (isPehmeakantinen(candSubfield.value) && relevantSubfields.some(sf => isPehmeakantinen(sf.value))) {
123
+ return true;
124
+ }
125
+ if (isKovakantinen(candSubfield.value) && relevantSubfields.some(sf => isKovakantinen(sf.value))) {
126
+ return true;
127
+ }
128
+ if (isKierreselka(candSubfield.value) && relevantSubfields.some(sf => isKierreselka(sf.value))) {
129
+ return true;
130
+ }
131
+ return false;
132
+ }
133
+
134
+ function httpToHttps(val) {
135
+ return val.replace(/http:\/\//ug, 'https://');
136
+ }
137
+
138
+ function pairHttpAndHttps(candSubfield, relevantSubfields) {
139
+ const a = httpToHttps(candSubfield.value);
140
+ const bs = relevantSubfields.map(sf => httpToHttps(sf.value));
141
+ return bs.includes(a);
142
+ }
143
+
144
+ function isSynonym(field, candSubfield, relevantSubfields) {
145
+ if (candSubfield.code === 'q' && ['015', '020', '024', '028'].includes(field.tag)) {
146
+ return coverTypesMatch(candSubfield, relevantSubfields);
147
+ }
148
+
149
+ //nvdebug(`Looking for synonyms for '${subfieldToString(candSubfield)}'...`, debugDev);
150
+
151
+ if (relationInformationMatches(candSubfield, relevantSubfields)) {
152
+ return true;
153
+ }
154
+
155
+ if (pairHttpAndHttps(candSubfield, relevantSubfields)) {
156
+ return true;
157
+ }
158
+
159
+ return false;
160
+ }
161
+
162
+ function preferHyphenatedISBN(field, candSubfield, relevantSubfields) {
163
+ if (!tagAndSubfieldCodeReferToIsbn(field.tag, candSubfield.code) || candSubfield.value.includes('-') === -1) {
164
+ return false;
165
+ }
166
+
167
+ // Must not already exist:
168
+ if (relevantSubfields.some(sf => sf.value === candSubfield.value)) {
169
+ return false;
170
+ }
171
+
172
+ const hyphenlessSubfields = relevantSubfields.filter(sf => sf.value.includes('-') > -1);
173
+ const pair = hyphenlessSubfields.find(sf => sf.value === candSubfield.value.replace(/-/gu, ''));
174
+ if (!pair) {
175
+ return false;
176
+ }
177
+ pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
178
+ return true;
179
+ }
180
+
181
+ function preferHttpsOverHttp(candSubfield, relevantSubfields) {
182
+ if (candSubfield.value.substring(0, 8) !== 'https://') {
183
+ return false;
184
+ }
185
+
186
+ const httpVersion = `http://${candSubfield.value.substring(8)}`;
187
+ const pair = relevantSubfields.find(sf => sf.value === httpVersion);
188
+
189
+ if (!pair) {
190
+ return false;
191
+ }
192
+ pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
193
+ return true;
194
+ }
195
+
196
+
197
+ function preferQualifierVersion(field, candSubfield, relevantSubfields) {
198
+ if (!canContainOptionalQualifier(field.tag, candSubfield.code)) { // currently only 300$a and 776$i can prefer source...
199
+ return false;
200
+ }
201
+
202
+ const [name1, qualifier1] = splitToNameAndQualifier(candSubfield.value);
203
+ const pair = relevantSubfields.find(sf => subfieldQualifierCheck(sf, name1, qualifier1));
204
+ if (!pair) {
205
+ return false;
206
+ }
207
+ // SN: "Kuvailuohjeiden näkökulmasta epubille ei pitäisi koskaan merkitä sivumäärää"
208
+ if (field.tag === '300' && candSubfield.code === 'a' && candSubfield.value.match(/(?:online|verkko)/iu)) {
209
+ return true; // True, but don't prefer the source value
210
+ }
211
+
212
+ pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
213
+ return true;
214
+
215
+ function subfieldQualifierCheck(subfield, name, qualifier) {
216
+ const [name2, qualifier2] = splitToNameAndQualifier(candSubfield.value);
217
+ if (name !== name2) {
218
+ return false;
219
+ }
220
+ if (!qualifier || !qualifier2 || qualifier === qualifier2) {
221
+ return true;
222
+ }
223
+ return false;
224
+ }
225
+
226
+ }
227
+
228
+ function preferSourceCorporateName(field, candSubfield, pair) {
229
+ if (candSubfield.code !== 'a' || !['110', '610', '710', '810'].includes(field.tag)) {
230
+ return false;
231
+ }
232
+ nvdebug(`CORP base '${pair.value}' vs '${candSubfield.value}'`, debugDev);
233
+ const prefer = actualPrefenceCheck();
234
+ if (prefer) {
235
+ pair.value = candSubfield.value; // eslint-disable-line functional/immutable-data
236
+ return true;
237
+ }
238
+ return false;
239
+
240
+ function actualPrefenceCheck() {
241
+ if (candSubfield.value.match(/^Werner Söderström/u) && pair.value.match(/^WSOY/ui)) {
242
+ return true;
243
+ }
244
+ if (candSubfield.value.match(/^ntamo/u) && pair.value.match(/^N(?:tamo|TAMO)/u)) {
245
+ return true;
246
+ }
247
+ // Prefer (qualifier):
248
+ const [sourceName, sourceQualifier] = splitToNameAndQualifier(candSubfield.value);
249
+ const [baseName, baseQualifier] = splitToNameAndQualifier(pair.value);
250
+ if (sourceName === baseName && baseQualifier === undefined && sourceQualifier !== undefined) {
251
+ return true;
252
+ }
253
+ // Not taking prefix and suffix into account here...
254
+ return false;
255
+ }
256
+
257
+ }
258
+
259
+ export function mergeSubfield(targetField, candSubfield) {
260
+ // Replace existing subfield with the incoming field. These replacements are by name rather hacky...
261
+ // Currenty we only select the better X00$d.
262
+ // In future we might do more things here. Examples:
263
+ // - "FOO" gets replaced by "Foo" in certain fields.
264
+ // - "Etunimi Sukunimi" might lose to "Sukunimi, Etunimi" in X00 fields.
265
+ // - [put your ideas here]
266
+ // Return true, if replace is done.
267
+ // However, replacing/succeeding requires a sanity check, that the new value is a better one...
268
+ // Thus, typically this function fails...
269
+
270
+ const relevantSubfields = targetField.subfields.filter(subfield => subfield.code === candSubfield.code);
271
+
272
+ // There's nothing to replace the incoming subfield with. Thus abort:
273
+ if (relevantSubfields.length === 0) {
274
+ return false;
275
+ }
276
+
277
+ nvdebug(`Got ${relevantSubfields.length} sf-cand(s) for field ${targetField.tag}‡${candSubfield.code}`, debugDev);
278
+
279
+
280
+ if (replaceDatesAssociatedWithName(targetField, candSubfield, relevantSubfields) ||
281
+ preferHyphenatedISBN(targetField, candSubfield, relevantSubfields) ||
282
+ preferHttpsOverHttp(candSubfield, relevantSubfields) ||
283
+ preferSourceCorporateName(targetField, candSubfield, relevantSubfields[0]) || // SF is non-repeat
284
+ preferQualifierVersion(targetField, candSubfield, relevantSubfields) ||
285
+ isSynonym(targetField, candSubfield, relevantSubfields)) {
286
+ return true;
287
+ }
288
+
289
+ // We found a crappy empty subfield: replace that with a meaningful one.
290
+ // 260 $a value "[S.l]" is the main type for this.
291
+ const meaninglessSubfields = relevantSubfields.filter(sf => !valueCarriesMeaning(targetField.tag, sf.code, sf.value));
292
+ if (meaninglessSubfields.length > 0) {
293
+ meaninglessSubfields[0].value = candSubfield.value; // eslint-disable-line functional/immutable-data
294
+ return true;
295
+ }
296
+
297
+ // Mark 490$v "osa 1" vs "1" as merged (2nd part of MET-53).
298
+ // NB! Keeps the original value and drops the incoming value. (Just preventing it from going to add-part...)
299
+ // NB! We could improve this and choose the longer value later on.
300
+ if (subfieldContainsPartData(targetField.tag, candSubfield.code)) {
301
+ if (relevantSubfields.some(sf => partsAgree(sf.value, candSubfield.value, targetField.tag, candSubfield.code))) {
302
+ return true;
303
+ }
304
+ }
305
+ return false; // default to failure
306
+ }
@@ -0,0 +1,50 @@
1
+
2
+ import {nvdebug, subfieldToString} from '../utils.js';
3
+ import createDebugLogger from 'debug';
4
+ import {cloneAndRemovePunctuation} from '../normalizeFieldForComparison.js';
5
+ import {sortAdjacentSubfields} from '../sortSubfields';
6
+ import {fieldFixPunctuation} from '../punctuation2';
7
+
8
+
9
+ // NB This should be moved and converted to a validator/fixer as well...
10
+ const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:merge-fields:removeDuplicateSubfields');
11
+ //const debugData = debug.extend('data');
12
+ const debugDev = debug.extend('dev');
13
+
14
+ export function recordRemoveDuplicateSubfieldsFromFields(record) {
15
+ record.fields.forEach(field => fieldRemoveDuplicateSubfields(field));
16
+ }
17
+
18
+ export function fieldRemoveDuplicateSubfields(field) {
19
+ // Skip bad (382, 505) and risky (264 ...) stuff: 382$n, 505$r, others...
20
+ if (!field.subfields || ['264', '300', '382', '505'].includes(field.tag)) {
21
+ return;
22
+ }
23
+
24
+ const strippedField = cloneAndRemovePunctuation(field); // make punctuation-less version
25
+ /* eslint-disable */
26
+ let seen = {};
27
+
28
+ field.subfields = field.subfields.filter((sf, i) => notSeenBefore(sf, i));
29
+
30
+ if (field.collapsed) {
31
+ sortAdjacentSubfields(field);
32
+ fieldFixPunctuation(field);
33
+ delete field.collapsed;
34
+ }
35
+
36
+
37
+ function notSeenBefore(sf, index) {
38
+ const subfieldAsString = subfieldToString(strippedField.subfields[index]); // use normalized form
39
+ if (seen[subfieldAsString]) {
40
+ nvdebug(`Remove field-internal duplicate subfield ${subfieldToString(sf)}`, debugDev);
41
+ field.collapsed = 1; // trigger punctuation reset
42
+ return false;
43
+ }
44
+ //nvdebug(`identical subfield removal: Add ${subfieldAsString} to seen[]`, debugDev);
45
+ seen[subfieldAsString] = subfieldAsString;
46
+ return true;
47
+ }
48
+ /* eslint-enable */
49
+
50
+ }
@@ -0,0 +1,104 @@
1
+ //import {nvdebug} from '../utils';
2
+
3
+ export function valueCarriesMeaning(tag, subfieldCode, value) {
4
+ // Some data is pretty meaningless and as meaningless is pretty close to nothing, this meaningless data should no prevent merge.
5
+ // The list below is incomples (swedish translations etc)
6
+ if (tag === '260' || tag === '264') {
7
+ // We drop these, instead of normalizing, as KV does not put this information in place...
8
+ if (subfieldCode === 'a') {
9
+ if (value.match(/^[^a-z]*(?:Kustannuspaikka tuntematon|S\.l)[^a-z]*$/ui)) {
10
+ return false;
11
+ }
12
+ }
13
+ if (subfieldCode === 'b') {
14
+ if (value.match(/^[^a-z]*(?:Kustantaja tuntematon|S\.n)[^a-z]*$/ui)) {
15
+ return false;
16
+ }
17
+ }
18
+ return true;
19
+ }
20
+ return true;
21
+ }
22
+
23
+ export function normalizeForSamenessCheck(tag, subfieldCode, originalValue) {
24
+ // NB! These work only for non-repeatable subfields!
25
+ // Repeatable subfields are currently handled in mergeSubfields.js. Only non-repeatable subfields block field merge,
26
+ // (This split is suboptiomal... Minimum fix: make this disctinction cleaner...)
27
+ if (subfieldCode === 'a' && ['100', '600', '700', '800'].includes(tag)) {
28
+ return normalizePersonalName(originalValue);
29
+ }
30
+
31
+ // NB! originalValue should already be lowercased, stripped on initial '[' chars and postpunctuation.
32
+ if (tag === '250' && subfieldCode === 'a') {
33
+ return normalizeEditionStatement(originalValue);
34
+ }
35
+
36
+ // 506 - Restrictions on Access Note (R), $a - Terms governing access (NR)
37
+ if (tag === '506' && subfieldCode === 'a') {
38
+ return normalize506a(originalValue);
39
+ }
40
+
41
+ if (tag === '534' && subfieldCode === 'p') {
42
+ return normalizeOriginalVersionNoteIntroductoryPhrase(originalValue);
43
+ }
44
+
45
+ return originalValue;
46
+ }
47
+
48
+
49
+ function normalizePersonalName(originalValue) {
50
+ // Use more readable "Forename Surname" format in comparisons:
51
+ return originalValue.replace(/^([^,]+), ([^,]+)$/u, '$2 $1'); // eslint-disable-line prefer-named-capture-group
52
+ }
53
+
54
+ const sallittu506a = ['sallittu kaikenikäisille', 'sallittu', 's']; // downcased, without punctuation
55
+ function normalize506a(originalValue) {
56
+ if (sallittu506a.includes(originalValue)) {
57
+ return sallittu506a[0];
58
+ }
59
+ return originalValue;
60
+ }
61
+
62
+ const introductoryPhrasesMeaning1 = ['alkuperäinen', 'alkuperäisen julkaisutiedot', 'alun perin julkaistu', 'alunperin julkaistu'];
63
+ function normalizeOriginalVersionNoteIntroductoryPhrase(originalValue) {
64
+ // MELKEHITYS-1935-ish:
65
+ if (introductoryPhrasesMeaning1.includes(originalValue)) {
66
+ return introductoryPhrasesMeaning1[0];
67
+ }
68
+
69
+ return originalValue;
70
+ }
71
+
72
+ function normalizeEditionStatement(originalValue) {
73
+ const value = originalValue;
74
+
75
+ // As normalization tries to translate things info Finnish, use this for similarity check only!
76
+ if (value.match(/^[1-9][0-9]*(?:\.|:a|nd|rd|st|th) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
77
+ const nth = value.replace(/[^0-9].*$/u, '');
78
+ return `${nth}. painos`;
79
+ }
80
+
81
+ // Quick and dirty fix for
82
+ if (value.match(/^[1-9][0-9]*(?:\.|:a|nd|rd|st|th)(?: förnyade|,? rev\.| uud\.| uudistettu) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
83
+ const nth = value.replace(/[^0-9].*$/u, '');
84
+ return `${nth}. uudistettu painos`;
85
+ }
86
+
87
+ if (value.match(/^(?:First|Första|Ensimmäinen) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
88
+ return `1. painos`;
89
+ }
90
+
91
+ if (value.match(/^(?:Andra|Second|Toinen) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
92
+ return `2. painos`;
93
+ }
94
+
95
+ if (value.match(/^(?:Kolmas|Third|Tredje) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
96
+ return `3. painos`;
97
+ }
98
+
99
+ if (value.match(/^(?:Fourth|Fjärde|Neljäs) (?:ed\.?|edition|p\.?|painos|uppl\.?|upplagan)[.\]]*$/ui)) {
100
+ return `4. painos`;
101
+ }
102
+
103
+ return originalValue;
104
+ }
@@ -0,0 +1,52 @@
1
+ import {expect} from 'chai';
2
+ import {MarcRecord} from '@natlibfi/marc-record';
3
+ import validatorFactory from './merge-fields/';
4
+ import {READERS} from '@natlibfi/fixura';
5
+ import generateTests from '@natlibfi/fixugen';
6
+ import createDebugLogger from 'debug';
7
+
8
+ generateTests({
9
+ callback,
10
+ path: [__dirname, '..', 'test-fixtures', 'merge-fields'],
11
+ useMetadataFile: true,
12
+ recurse: false,
13
+ fixura: {
14
+ reader: READERS.JSON
15
+ },
16
+ mocha: {
17
+ before: () => testValidatorFactory()
18
+ }
19
+ });
20
+ const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/merge-field:test');
21
+
22
+ async function testValidatorFactory() {
23
+ const validator = await validatorFactory();
24
+
25
+ expect(validator)
26
+ .to.be.an('object')
27
+ .that.has.any.keys('description', 'validate');
28
+
29
+ expect(validator.description).to.be.a('string');
30
+ expect(validator.validate).to.be.a('function');
31
+ }
32
+
33
+ async function callback({getFixture, enabled = true, fix = false}) {
34
+ if (enabled === false) {
35
+ debug('TEST SKIPPED!');
36
+ return;
37
+ }
38
+
39
+ const validator = await validatorFactory();
40
+ const record = new MarcRecord(getFixture('record.json'));
41
+ const expectedResult = getFixture('expectedResult.json');
42
+ // console.log(expectedResult); // eslint-disable-line
43
+
44
+ if (!fix) {
45
+ const result = await validator.validate(record);
46
+ expect(result).to.eql(expectedResult);
47
+ return;
48
+ }
49
+
50
+ await validator.fix(record);
51
+ expect(record).to.eql(expectedResult);
52
+ }
@@ -1,9 +1,13 @@
1
- // import createDebugLogger from 'debug';
1
+ import createDebugLogger from 'debug';
2
2
  // const debug = createDebugLogger('@natlibfi/marc-record-validator-melinda/subfield6Utils');
3
3
 
4
4
  import {add8s, fieldsGetAllSubfield8LinkingNumbers, getSubfield8LinkingNumber, isValidSubfield8} from './subfield8Utils';
5
5
  import {fieldHasSubfield, fieldToString, fieldsToString, nvdebug, subfieldToString} from './utils';
6
6
 
7
+ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:subfield6Utils');
8
+ //const debugData = debug.extend('data');
9
+ const debugDev = debug.extend('dev');
10
+
7
11
  // NB! Subfield 6 is non-repeatable and it should always comes first!
8
12
  // NB! Index size should always be 2 (preceding 0 required for 01..09) However, support for 100+ was added on 2023-02-27.
9
13
  // NB! Index value '00' are left as they are (is not paired/indexed/whatever.
@@ -483,3 +487,12 @@ export function get6s(field, candidateFields) { // NB! Convert field to fields!!
483
487
  return relevantFields;
484
488
  }
485
489
 
490
+ export function resetSubfield6Tag(subfield, tag) {
491
+ if (!isValidSubfield6(subfield)) {
492
+ return;
493
+ }
494
+ // NB! mainly for 1XX<->7XX transfers
495
+ const newValue = `${tag}-${subfield.value.substring(4)}`;
496
+ nvdebug(`Set subfield $6 value from ${subfieldToString(subfield)} to ${newValue}`, debugDev);
497
+ subfield.value = newValue; // eslint-disable-line functional/immutable-data
498
+ }
package/src/utils.js CHANGED
@@ -1,3 +1,15 @@
1
+ import createDebugLogger from 'debug';
2
+
3
+ import fs from 'fs';
4
+ import path from 'path';
5
+
6
+ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:utils');
7
+ //const debugData = debug.extend('data');
8
+ const debugDev = debug.extend('dev');
9
+
10
+ const melindaFields = JSON.parse(fs.readFileSync(path.join(__dirname, '..', 'src', 'melindaCustomMergeFields.json'), 'utf8'));
11
+
12
+
1
13
  export function isElectronicMaterial(record) {
2
14
  const f337s = record.get('337');
3
15
 
@@ -112,3 +124,110 @@ export function getCatalogingLanguage(record, defaultCatalogingLanguage = undefi
112
124
  export function uniqArray(arr) {
113
125
  return arr.filter((val, i) => arr.indexOf(val) === i);
114
126
  }
127
+
128
+ export function fieldsAreIdentical(field1, field2) {
129
+ if (field1.tag !== field2.tag) { // NB! We are skipping normalizations here on purpose! They should be done beforehand...
130
+ return false;
131
+ }
132
+ return fieldToString(field1) === fieldToString(field2);
133
+
134
+ // The order of subfields is relevant! Bloody JS idiotisms make people use conditions such as:
135
+ // return field1.subfields.every(sf => field2.subfields.some(sf2 => sf.code === sf2.code && sf.value === sf2.value));
136
+ }
137
+
138
+ export function fieldHasNSubfields(field, subfieldCode/*, subfieldValue = null*/) {
139
+ const relevantSubfields = field.subfields.filter(sf => sf.code === subfieldCode);
140
+ //if (subfieldValue === null) {
141
+ return relevantSubfields.length;
142
+ //}
143
+ //const subset = relevantSubfields.filter(value => value === subfieldValue);
144
+ //return subset.length;
145
+ }
146
+
147
+ export function removeCopyright(value) {
148
+ return value.replace(/^(?:c|p|©|℗|Cop\. ?) ?((?:1[0-9][0-9][0-9]|20[012][0-9])\.?)$/ui, '$1'); // eslint-disable-line prefer-named-capture-group
149
+ }
150
+
151
+ function isNonStandardNonrepeatableSubfield(tag, subfieldCode) {
152
+ // Put these into config or so...
153
+ if (tag === '264') {
154
+ return ['a', 'b', 'c'].includes(subfieldCode);
155
+ }
156
+
157
+ if (['336', '337', '338'].includes(tag)) {
158
+ return ['a', 'b', '2'].includes(subfieldCode);
159
+ }
160
+
161
+ return false;
162
+ }
163
+
164
+
165
+ export function subfieldIsRepeatable(tag, subfieldCode) {
166
+
167
+ if (isNonStandardNonrepeatableSubfield(tag, subfieldCode)) {
168
+ return false;
169
+ }
170
+
171
+ // These we know or "know":
172
+ // NB! $5 is (according to MARC21 format) non-repeatable, and not usable in all fields, but Melinda has a local exception to this, see MET-300
173
+ if ('0159'.indexOf(subfieldCode) > -1) {
174
+ // Uh, can $0 appear on any field?
175
+ return true;
176
+ }
177
+
178
+ const fieldSpecs = melindaFields.fields.filter(field => field.tag === tag);
179
+ if (fieldSpecs.length !== 1) {
180
+ nvdebug(` WARNING! Getting field ${tag} data failed! ${fieldSpecs.length} hits. Default value true is used for'${subfieldCode}' .`, debugDev);
181
+ return true;
182
+ }
183
+
184
+ const subfieldSpecs = fieldSpecs[0].subfields.filter(subfield => subfield.code === subfieldCode);
185
+ // Currently we don't support multiple $6 fields due to re-indexing limitations...
186
+ // Well, $6 is non-repeatable, isn't it?!?
187
+ // (This might actually already be fixed... Marginal issue, but check eventually.)
188
+ if (subfieldSpecs.length !== 1 || subfieldCode === '6') {
189
+ return false; // repeatable if not specified, I guess. Maybe add log or warn?
190
+ }
191
+ return subfieldSpecs[0].repeatable;
192
+ }
193
+
194
+ function marc21GetTagsLegalIndicators(tag) {
195
+ const fieldSpecs = melindaFields.fields.filter(field => field.tag === tag);
196
+ if (fieldSpecs.length === 0) {
197
+ return undefined;
198
+ }
199
+ return fieldSpecs[0].indicators;
200
+ }
201
+
202
+ export function marc21GetTagsLegalInd1Value(tag) {
203
+ const indicator = marc21GetTagsLegalIndicators(tag);
204
+ if (indicator === undefined) {
205
+ return undefined;
206
+ }
207
+ return indicator.ind1;
208
+ }
209
+
210
+ export function marc21GetTagsLegalInd2Value(tag) {
211
+ const indicator = marc21GetTagsLegalIndicators(tag);
212
+ if (indicator === undefined) {
213
+ return undefined;
214
+ }
215
+ return indicator.ind2;
216
+ }
217
+
218
+ export function nvdebugSubfieldArray(subfields, prefix = ' ', func = undefined) {
219
+ subfields.forEach(subfield => nvdebug(`${prefix}${subfieldToString(subfield)}`, func));
220
+ }
221
+
222
+ export function subfieldsAreIdentical(subfieldA, subfieldB) {
223
+ return subfieldA.code === subfieldB.code && subfieldA.value === subfieldB.value;
224
+ }
225
+
226
+ export function fieldHasMultipleSubfields(field, subfieldCode/*, subfieldValue = null*/) {
227
+ return fieldHasNSubfields(field, subfieldCode) > 1;
228
+ }
229
+
230
+ export function hasCopyright(value) {
231
+ const modValue = removeCopyright(value);
232
+ return value !== modValue;
233
+ }
@@ -0,0 +1,11 @@
1
+ {
2
+ "_validationOptions": {},
3
+ "leader": "12345cam a22123454i 4500",
4
+ "fields": [
5
+ { "tag": "700", "ind1": "1", "ind2": " ", "subfields": [
6
+ { "code": "a", "value": "Sukunimi, Etunimi," },
7
+ { "code": "e", "value": "säveltäjä," },
8
+ { "code": "e", "value": "sovittaja." }
9
+ ]}
10
+ ]
11
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "description": "Fix: merge two 700 fields",
3
+ "only": false,
4
+ "fix": true
5
+ }