@natlibfi/marc-record-validators-melinda 12.0.6 → 12.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/cyrillux.js +11 -11
  2. package/dist/cyrillux.js.map +2 -2
  3. package/dist/dataProvenanceUtils.js +19 -0
  4. package/dist/dataProvenanceUtils.js.map +7 -0
  5. package/dist/index.js +3 -1
  6. package/dist/index.js.map +2 -2
  7. package/dist/merge-fields/controlSubfields.js.map +2 -2
  8. package/dist/merge-fields/counterpartField.js +149 -152
  9. package/dist/merge-fields/counterpartField.js.map +3 -3
  10. package/dist/merge-fields/dataProvenance.js +6 -20
  11. package/dist/merge-fields/dataProvenance.js.map +2 -2
  12. package/dist/merge-fields/index.js +1 -1
  13. package/dist/merge-fields/index.js.map +2 -2
  14. package/dist/merge-fields/mergableIndicator.js +1 -2
  15. package/dist/merge-fields/mergableIndicator.js.map +2 -2
  16. package/dist/merge-fields/mergeConfig.js +2 -0
  17. package/dist/merge-fields/mergeConfig.js.map +2 -2
  18. package/dist/merge-fields/mergeConstraints.js +35 -32
  19. package/dist/merge-fields/mergeConstraints.js.map +3 -3
  20. package/dist/merge-fields/mergeField.js +4 -3
  21. package/dist/merge-fields/mergeField.js.map +2 -2
  22. package/dist/merge-fields/mergeOrAddSubfield.js +8 -7
  23. package/dist/merge-fields/mergeOrAddSubfield.js.map +2 -2
  24. package/dist/merge-fields/mergeSubfield.js +5 -1
  25. package/dist/merge-fields/mergeSubfield.js.map +2 -2
  26. package/dist/merge-fields/worldKnowledge.js +52 -0
  27. package/dist/merge-fields/worldKnowledge.js.map +2 -2
  28. package/dist/merge-fields.test.js +2 -2
  29. package/dist/merge-fields.test.js.map +2 -2
  30. package/dist/normalize-dashes.js +2 -2
  31. package/dist/normalize-dashes.js.map +2 -2
  32. package/dist/normalizeFieldForComparison.js +8 -14
  33. package/dist/normalizeFieldForComparison.js.map +2 -2
  34. package/dist/prepublicationUtils.js +1 -1
  35. package/dist/prepublicationUtils.js.map +2 -2
  36. package/dist/punctuation2.js +10 -10
  37. package/dist/punctuation2.js.map +2 -2
  38. package/dist/removeDuplicateDataFields.js +1 -24
  39. package/dist/removeDuplicateDataFields.js.map +2 -2
  40. package/dist/removeInferiorDataFields.js +3 -2
  41. package/dist/removeInferiorDataFields.js.map +2 -2
  42. package/dist/sortSubfields.js +19 -19
  43. package/dist/sortSubfields.js.map +2 -2
  44. package/dist/subfield6Utils.js +0 -1
  45. package/dist/subfield6Utils.js.map +2 -2
  46. package/dist/subfield8Utils.js +0 -5
  47. package/dist/subfield8Utils.js.map +2 -2
  48. package/dist/utils.js +29 -3
  49. package/dist/utils.js.map +2 -2
  50. package/package.json +4 -4
  51. package/src/cyrillux.js +11 -11
  52. package/src/dataProvenanceUtils.js +21 -0
  53. package/src/index.js +3 -1
  54. package/src/merge-fields/controlSubfields.js +0 -1
  55. package/src/merge-fields/counterpartField.js +191 -290
  56. package/src/merge-fields/dataProvenance.js +8 -25
  57. package/src/merge-fields/index.js +1 -1
  58. package/src/merge-fields/mergableIndicator.js +1 -2
  59. package/src/merge-fields/mergeConfig.js +2 -1
  60. package/src/merge-fields/mergeConstraints.js +39 -34
  61. package/src/merge-fields/mergeField.js +4 -7
  62. package/src/merge-fields/mergeOrAddSubfield.js +8 -7
  63. package/src/merge-fields/mergeSubfield.js +11 -2
  64. package/src/merge-fields/worldKnowledge.js +72 -3
  65. package/src/merge-fields.test.js +2 -2
  66. package/src/normalize-dashes.js +2 -2
  67. package/src/normalizeFieldForComparison.js +19 -20
  68. package/src/prepublicationUtils.js +1 -1
  69. package/src/punctuation2.js +10 -10
  70. package/src/removeDuplicateDataFields.js +24 -24
  71. package/src/removeInferiorDataFields.js +3 -2
  72. package/src/sortSubfields.js +19 -19
  73. package/src/subfield6Utils.js +1 -1
  74. package/src/subfield8Utils.js +5 -5
  75. package/src/utils.js +39 -12
  76. package/test-fixtures/cyrillux/f14/expectedResult.json +32 -0
  77. package/test-fixtures/cyrillux/f14/metadata.json +10 -0
  78. package/test-fixtures/cyrillux/f14/record.json +14 -0
  79. package/test-fixtures/merge-fields/f042_01/expectedResult.json +12 -0
  80. package/test-fixtures/merge-fields/f042_01/metadata.json +6 -0
  81. package/test-fixtures/merge-fields/f042_01/record.json +13 -0
  82. package/test-fixtures/merge-fields/f06/expectedResult.json +42 -0
  83. package/test-fixtures/merge-fields/f06/metadata.json +6 -0
  84. package/test-fixtures/merge-fields/f06/record.json +41 -0
  85. package/test-fixtures/merge-fields/f07/expectedResult.json +18 -0
  86. package/test-fixtures/merge-fields/f07/metadata.json +6 -0
  87. package/test-fixtures/merge-fields/f07/record.json +18 -0
  88. package/test-fixtures/merge-fields/f08/expectedResult.json +12 -0
  89. package/test-fixtures/merge-fields/f08/metadata.json +7 -0
  90. package/test-fixtures/merge-fields/f08/record.json +10 -0
  91. package/test-fixtures/merge-fields/f09/expectedResult.json +14 -0
  92. package/test-fixtures/merge-fields/f09/metadata.json +6 -0
  93. package/test-fixtures/merge-fields/f09/record.json +14 -0
  94. package/test-fixtures/merge-fields/f10/expectedResult.json +25 -0
  95. package/test-fixtures/merge-fields/f10/metadata.json +6 -0
  96. package/test-fixtures/merge-fields/f10/record.json +25 -0
  97. package/test-fixtures/merge-fields/f11/expectedResult.json +40 -0
  98. package/test-fixtures/merge-fields/f11/metadata.json +7 -0
  99. package/test-fixtures/merge-fields/f11/record.json +50 -0
  100. package/test-fixtures/merge-fields/f12/expectedResult.json +17 -0
  101. package/test-fixtures/merge-fields/f12/metadata.json +6 -0
  102. package/test-fixtures/merge-fields/f12/record.json +25 -0
  103. package/test-fixtures/merge-fields/f13/expectedResult.json +18 -0
  104. package/test-fixtures/merge-fields/f13/metadata.json +6 -0
  105. package/test-fixtures/merge-fields/f13/record.json +28 -0
  106. package/test-fixtures/merge-fields/f14/expectedResult.json +25 -0
  107. package/test-fixtures/merge-fields/f14/metadata.json +6 -0
  108. package/test-fixtures/merge-fields/f14/record.json +25 -0
  109. package/test-fixtures/merge-fields/f300_01/expectedResult.json +9 -0
  110. package/test-fixtures/merge-fields/f300_01/metadata.json +6 -0
  111. package/test-fixtures/merge-fields/f300_01/record.json +8 -0
  112. package/test-fixtures/merge-fields/f300_02/expectedResult.json +13 -0
  113. package/test-fixtures/merge-fields/f300_02/metadata.json +6 -0
  114. package/test-fixtures/merge-fields/f300_02/record.json +16 -0
  115. package/test-fixtures/merge-fields/f490_01/expectedResult.json +13 -0
  116. package/test-fixtures/merge-fields/f490_01/metadata.json +6 -0
  117. package/test-fixtures/merge-fields/f490_01/record.json +16 -0
  118. package/test-fixtures/remove-inferior-datafields/f17/expectedResult.json +11 -0
  119. package/test-fixtures/remove-inferior-datafields/f17/metadata.json +5 -0
  120. package/test-fixtures/remove-inferior-datafields/f17/record.json +15 -0
@@ -1,23 +1,24 @@
1
1
  // For each incoming field that
2
2
 
3
3
  import createDebugLogger from 'debug';
4
- import {fieldHasSubfield, fieldHasNSubfields, fieldHasMultipleSubfields, fieldToString, nvdebug, removeCopyright} from '../utils.js';
4
+ import {fieldHasSubfield, fieldHasNSubfields, fieldHasMultipleSubfields, fieldToString, nvdebug, removeCopyright, tagIsRepeatable} from '../utils.js';
5
5
  import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation} from '../normalizeFieldForComparison.js';
6
- // This should be done via our own normalizer:
7
6
  import {normalizeControlSubfieldValue} from '../normalize-identifiers.js';
8
7
 
9
8
  import {getMergeConstraintsForTag} from './mergeConstraints.js';
10
9
  import {controlSubfieldsPermitMerge} from './controlSubfields.js';
11
10
  import {mergableIndicator1, mergableIndicator2} from './mergableIndicator.js';
12
11
  import {partsAgree} from '../normalizeSubfieldValueForComparison.js';
13
- import {normalizeForSamenessCheck, valueCarriesMeaning} from './worldKnowledge.js';
14
- import { provenanceSubfieldsPermitMerge } from './dataProvenance.js';
12
+ import {getSynonym, normalizeForSamenessCheck, valueCarriesMeaning} from './worldKnowledge.js';
13
+ import {provenanceSubfieldsPermitMerge} from './dataProvenance.js';
14
+
15
+ // NB! We are using internal prefix '(FIN11)' instead of global (FI-ASTERI-N) here. The latter would be better but would require some work and testing.
15
16
 
16
17
  const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:mergeField:counterpart');
17
18
  //const debugData = debug.extend('data');
18
19
  const debugDev = debug.extend('dev');
19
20
 
20
- const irrelevantSubfieldsInNameAndTitlePartComparison = '5689';
21
+ // NB! FIN11 $0 pairing is handled in this code. We might want to support others, esp. FIN13 as well.
21
22
 
22
23
  const counterpartRegexps = { // NB! tag is from source!
23
24
  // Note that in the normal case, all source 1XX fields have been converted to 7XX fields.
@@ -36,17 +37,6 @@ const counterpartRegexpsSingle = {
36
37
  '940': /^[29]40$/u, '973': /^[79]73$/u
37
38
  };
38
39
 
39
- /*
40
- function differentPublisherSubfields(field1, field2) {
41
- if (field1.tag === '260' && field2.tag === '264' && field2.ind2 === '3') {
42
- return true;
43
- }
44
- if (field1.tag === '264' && field1.ind2 === '3' && field2.tag === '260') {
45
- return true;
46
- }
47
- return false;
48
- }
49
- */
50
40
 
51
41
  export function splitToNameAndQualifier(value) {
52
42
  if (value.match(/^.* \([^()]+\)$/u)) {
@@ -57,7 +47,7 @@ export function splitToNameAndQualifier(value) {
57
47
  return [value, undefined];
58
48
  }
59
49
 
60
- export function splitToNameAndQualifierAndProcessName(name) {
50
+ function splitToNameAndQualifierAndProcessName(name) {
61
51
  //const nameOnly = name.replace(/(?: \([^)]+\)| abp?| Kustannus| Kustannus Oy|, kustannusosakeyhtiö| oyj?| ry)$/ugi, '');
62
52
  const [qualifierlessName, qualifier] = splitToNameAndQualifier(name);
63
53
 
@@ -101,7 +91,7 @@ export function splitToNameAndQualifierAndProcessName(name) {
101
91
  }
102
92
 
103
93
  export function canContainOptionalQualifier(tag, subfieldCode) {
104
- // We have made 300$a NON-repeatable (against specs), as we newer want there to repeat (probably near-duplicates)
94
+ // We have made 300$a NON-repeatable (against specs), as we never want them to be repeated (probably near-duplicates)
105
95
  if (tag === '300' && subfieldCode === 'a') {
106
96
  return true;
107
97
  }
@@ -113,6 +103,7 @@ export function canContainOptionalQualifier(tag, subfieldCode) {
113
103
  }
114
104
 
115
105
  function withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode) {
106
+ // Split value to name and qualifier parts. Names must be equal, and qualifiers must be equal, iff both values contain them.
116
107
  if (!canContainOptionalQualifier(tag, subfieldCode)) {
117
108
  return false;
118
109
  }
@@ -133,11 +124,8 @@ function withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode) {
133
124
  }
134
125
 
135
126
  return false;
136
-
137
-
138
127
  }
139
128
 
140
-
141
129
  function corporateNamesAgree(value1, value2, tag, subfieldCode) {
142
130
  if (subfieldCode !== 'a' || !['110', '610', '710', '810'].includes(tag)) {
143
131
  return false;
@@ -158,166 +146,34 @@ function corporateNamesAgree(value1, value2, tag, subfieldCode) {
158
146
  // Currently all prefixes and suffixes are publisher information, so there's no point comparing them any further...
159
147
 
160
148
  return true;
161
-
162
- /*
163
- function isKustantaja(nameData) {
164
- if (nameData.suffix.match(/^(?:Kustannus|Kustannus oy|kustannusosakeyhtiö)$/iu)) {
165
- return true;
166
- }
167
- if (nameData.prefix.match(/^Kustannus Oy$/i)) {
168
- return true;
169
- }
170
- return false;
171
- }
172
- */
173
- }
174
-
175
-
176
- function pairableValue(tag, subfieldCode, value1, value2) {
177
- // This function could just return true or false.
178
- // I thought of preference when I wrote this, but preference implemented *here* (modularity). mergeFields.js should handle preference.
179
- if (withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode)) {
180
- // 300$a "whatever" and "whatever (123 sivua)"
181
- return value1;
182
- }
183
- if (partsAgree(value1, value2, tag, subfieldCode) || corporateNamesAgree(value1, value2, tag, subfieldCode)) {
184
- // Pure baseness: here we assume that base's value1 is better than source's value2.
185
- return value1;
186
- }
187
-
188
- return undefined;
189
149
  }
190
150
 
191
-
192
151
  function counterpartExtraNormalize(tag, subfieldCode, value) {
193
-
194
152
  // Remove trailing punctuation:
195
153
  value = value.replace(/(\S)(?:,|\.|\?|!|\. -| *:| *;| =| \/)$/u, '$1');
196
154
  // Remove brackets:
197
155
  value = value.replace(/^\(([^()]+)\)$/u, '$1'); // Remove initial '(' and final ')' if both exist.
198
156
  value = value.replace(/^\[([^[\]]+)\]$/u, '$1'); // Remove initial '[' and final ']' if both exist.
199
157
  // Mainly for field 260$c:
200
- value = removeCopyright(value);
201
-
202
- value = value.replace(/http:\/\//ug, 'https://'); // MET-501: http vs https
203
- value = normalizeForSamenessCheck(tag, subfieldCode, value);
204
-
205
- return value;
206
- }
207
-
208
- function uniqueKeyMatches(baseField, sourceField, forcedKeyString = null) {
209
- // NB! Assume that field1 and field2 have same relevant subfields.
210
- // What to do if if base
211
- // const keySubfieldsAsString = forcedKeyString || getUniqueKeyFields(field1);
212
- const keySubfieldsAsString = forcedKeyString || getMergeConstraintsForTag(baseField.tag, 'key');
213
- //return mandatorySubfieldComparison(baseField, sourceField, keySubfieldsAsString);
214
- return optionalSubfieldComparison(baseField, sourceField, keySubfieldsAsString);
215
- }
216
-
217
-
218
- function optionalSubfieldComparison(originalBaseField, originalSourceField, keySubfieldsAsString) {
219
- // Here "optional subfield" means a subfield, that needs not to be present, but if present, it must be identical...
220
- // (Think of a better name...)
221
- // We use clones here, since these changes done below are not intented to appear on the actual records.
222
- const field1 = cloneAndNormalizeFieldForComparison(originalBaseField);
223
- const field2 = cloneAndNormalizeFieldForComparison(originalSourceField);
224
-
225
- if (keySubfieldsAsString === null) { // does not currently happen
226
- // If keySubfieldsAsString is undefined, (practically) everything is the string.
227
- // When everything is the string, the strings need to be (practically) identical.
228
- // (NB! Here order matters. We should probably make it matter everywhere.)
229
- // (However, keySubfieldsAsString === '' will always succeed. Used by 040 at least.)
230
- // NB! substring(6) skips "TAG II" (I=indicator. Thus we skip indicators)
231
- return fieldToString(field1).substring(6) === fieldToString(field2).substring(6);
232
- }
233
- const subfieldArray = keySubfieldsAsString.split('');
234
-
235
- // Long forgotten, but my educated guess about this: if 'key' is defined in merge constraints
236
- // for this field, then at least one of the subfield codes in 'key' must be present in both fields.
237
- // However, this is not necessarily right.
238
- if (subfieldArray.length > 0 && !subfieldArray.some(sfCode => hasCommonNominator(sfCode))) {
239
- return false;
158
+ if (tag === '260' && subfieldCode === 'c') {
159
+ value = removeCopyright(value);
240
160
  }
161
+ value = value.replace(/http:\/\//ug, 'https://'); // MET-501: http vs https
162
+ value = getSynonym(tag, subfieldCode, value);
163
+ value = normalizeForSamenessCheck(tag, subfieldCode, value); // Better to remove trailing punctuation before calling this...
241
164
 
242
165
 
243
- return subfieldArray.every(subfieldCode => testOptionalSubfield(originalBaseField.tag, subfieldCode));
244
-
245
-
246
- function hasCommonNominator(subfieldCode) {
247
- //nvdebug(`hasCommonNominator(${subfieldCode})? '${fieldToString(originalBaseField)}' vs '${fieldToString(originalSourceField)}'`, debugDev);
248
-
249
- // If base has $a and source has $b, there's no common nominator, thus fail...
250
- const subfields1 = field1.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field1.tag, subfield.code, subfield.value));
251
- const subfields2 = field2.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field2.tag, subfield.code, subfield.value));
252
-
253
- return subfields1.length > 0 && subfields2.length > 0;
254
- }
255
-
256
- function testOptionalSubfield(tag, subfieldCode) {
257
- // NB! Don't compare non-meaningful subfields
258
- const subfields1 = field1.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field1.tag, subfield.code, subfield.value));
259
- const subfields2 = field2.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field2.tag, subfield.code, subfield.value));
260
-
261
- // If one side is empty, all is good
262
- if (subfields1.length === 0 || subfields2.length === 0) {
263
- return true;
264
- }
265
-
266
- //nvdebugSubfieldArray(subfields1, 'SF1', debugDev);
267
- //nvdebugSubfieldArray(subfields2, 'SF2', debugDev);
268
-
269
- // When pairing we can use stronger normalizations than the generic one:
270
- const subfieldValues1 = subfields1.map(sf => counterpartExtraNormalize(tag, subfieldCode, sf.value));
271
- const subfieldValues2 = subfields2.map(sf => counterpartExtraNormalize(tag, subfieldCode, sf.value));
272
-
273
- //nvdebug(`SF1 NORM: ${subfieldValues1.join(' --')}`, debugDev);
274
- //nvdebug(`SF2 NORM: ${subfieldValues2.join(' --')}`, debugDev);
275
-
276
- // If one set is a subset of the other, all is probably good (how about 653$a, 505...)
277
- if (subfieldValues1.every(val => subfieldValues2.includes(val)) || subfieldValues2.every(val => subfieldValues1.includes(val))) {
278
- return true;
279
- }
280
-
281
- if (subfieldValues1.length === 1 && subfieldValues2.length === 1) {
282
- return pairableValue(field1.tag, subfieldCode, subfieldValues1[0], subfieldValues2[0]) !== undefined;
283
- }
284
-
285
- return false;
286
-
287
- }
166
+ return value;
288
167
  }
289
168
 
169
+ function hasCommonNominator(field1, field2, subfieldCode) {
170
+ //nvdebug(`hasCommonNominator(${subfieldCode})? '${fieldToString(originalBaseField)}' vs '${fieldToString(originalSourceField)}'`, debugDev);
290
171
 
291
- function mandatorySubfieldComparison(originalField1, originalField2, keySubfieldsAsString) {
292
- // NB! We use clones here, since these changes done below are not intented to appear on the actual records.
293
- const field1 = cloneAndNormalizeFieldForComparison(originalField1);
294
- const field2 = cloneAndNormalizeFieldForComparison(originalField2);
295
- if (keySubfieldsAsString === null) { // does not currently happen
296
- // If keySubfieldsAsString is undefined, (practically) everything is the string.
297
- // When everything is the string, the strings need to be (practically) identical.
298
- // (NB! Here order matters. We should probably make it matter everywhere.)
299
- // (However, keySubfieldsAsString === '' will always succeed. Used by 040 at least.)
300
- return fieldToString(field1) === fieldToString(field2);
301
- }
302
- const subfieldArray = keySubfieldsAsString.split('');
303
-
304
- //const differentSubfieldCodes = differentPublisherSubfields(originalField1, originalField2);
305
-
306
- return subfieldArray.every(subfieldCode => mandatorySingleSubfieldComparison(subfieldCode));
307
-
308
- function mandatorySingleSubfieldComparison(subfieldCode) {
309
- //const otherSubfieldCode = getOtherSubfieldCode(subfieldCode);
310
- const subfieldValues1 = field1.subfields.filter(subfield => subfield.code === subfieldCode).map(sf => sf.value);
311
- const subfieldValues2 = field2.subfields.filter(subfield => subfield.code === subfieldCode).map(sf => sf.value);
312
- // Assume that at least 1 instance must exist and that all instances must match
313
- if (subfieldValues1.length !== subfieldValues2.length) {
314
- debugDev(`mSC: Unique key: subfield ${subfieldCode} issues...`);
315
- return false;
316
- }
317
-
318
- return subfieldValues1.every(value => subfieldValues2.includes(value));
319
- }
172
+ // If base has $a and source has $b, there's no common nominator, thus fail...
173
+ const subfields1 = field1.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field1.tag, subfield.code, subfield.value));
174
+ const subfields2 = field2.subfields.filter(subfield => subfield.code === subfieldCode && valueCarriesMeaning(field2.tag, subfield.code, subfield.value));
320
175
 
176
+ return subfields1.length > 0 && subfields2.length > 0;
321
177
  }
322
178
 
323
179
  function tagToRegexp(tag, internalMerge = false) {
@@ -334,7 +190,7 @@ function tagToRegexp(tag, internalMerge = false) {
334
190
  }
335
191
 
336
192
  function areRequiredSubfieldsPresent(field) {
337
- const subfieldString = getMergeConstraintsForTag(field.tag, 'required');
193
+ const subfieldString = getMergeConstraintsForTag(field.tag, 'required').join('');
338
194
  if (subfieldString === null) {
339
195
  return true;
340
196
  } // nothing is required
@@ -349,18 +205,31 @@ function areRequiredSubfieldsPresent(field) {
349
205
  });
350
206
  }
351
207
 
352
- function arePairedSubfieldsInBalance(field1, field2) {
353
- const subfieldString = getMergeConstraintsForTag(field1.tag, 'paired');
354
- if (subfieldString === null) {
355
- return true;
208
+ function getUnbalancedPairedSubfieldCode(field1, field2) {
209
+ const fullSubfieldString = getMergeConstraintsForTag(field1.tag, 'paired').join('') || '';
210
+
211
+ if (fullSubfieldString === '') {
212
+ return false;
213
+ }
214
+
215
+ // If the two fields share the FIN11 ID (WE SHOULD SUPPORT FIN13 AS WELL) there's no need to check the 'paired' constraint regarding related subfields.
216
+ // Meaning that it this is FIN11 match we should not bother checking whether something like 100$b/c/d/q is there. (NB! Note that 'required' check is not alleviated in this way)
217
+ // (I'm not saying that 100$b/c/d/q are in 'paired' contraint, I'm just illustrating the issue here)
218
+ const pairable = pairableIdentifier(field1, field2, '(FIN11)');
219
+ const subfieldString = pairable ? removeNameRelatedSubfieldCodes(fullSubfieldString, field1.tag) : fullSubfieldString;
220
+ debug(`CHECK ${pairable ? 'PAIRABLE ' : ''}${field1.tag} PAIRS: '${fullSubfieldString}' => '${subfieldString}'`);
221
+
222
+ if (subfieldString === '') {
223
+ return false;
356
224
  }
357
225
  const subfieldArray = subfieldString.split('');
358
226
 
359
- return subfieldArray.every(sfcode => fieldHasNSubfields(field1, sfcode) === fieldHasNSubfields(field2, sfcode));
227
+ return subfieldArray.find(sfcode => fieldHasNSubfields(field1, sfcode) !== fieldHasNSubfields(field2, sfcode));
360
228
  }
361
229
 
362
230
  function syntacticallyMergablePair(baseField, sourceField, config) {
363
231
  // Indicators must typically be equal (there are exceptions such as non-filing characters though):
232
+ nvdebug("CHECK SYNTAX");
364
233
  if (!mergableIndicator1(baseField, sourceField, config)) {
365
234
  nvdebug(`non-mergable (reason: indicator1): ${JSON.stringify(config)}`, debugDev);
366
235
  return false;
@@ -390,8 +259,9 @@ function syntacticallyMergablePair(baseField, sourceField, config) {
390
259
  }
391
260
 
392
261
  // Stuff of Hacks! Eg. require that both fields either have or have not X00$t:
393
- if (!arePairedSubfieldsInBalance(baseField, sourceField)) {
394
- nvdebug('required subfield pair check failed.', debugDev);
262
+ const subfieldCodeThatFailsToPair = getUnbalancedPairedSubfieldCode(baseField, sourceField);
263
+ if (subfieldCodeThatFailsToPair) {
264
+ nvdebug(`non-mergable (reason: required subfield pair check failed for code: '${subfieldCodeThatFailsToPair}')`, debugDev);
395
265
  return false;
396
266
  }
397
267
 
@@ -400,6 +270,7 @@ function syntacticallyMergablePair(baseField, sourceField, config) {
400
270
 
401
271
  function mergablePair(baseField, sourceField, config) {
402
272
  if (!syntacticallyMergablePair(baseField, sourceField, config)) {
273
+ nvdebug('non-mergable (reason: syntax)', debugDev);
403
274
  return false;
404
275
  }
405
276
 
@@ -413,41 +284,56 @@ function mergablePair(baseField, sourceField, config) {
413
284
  return true;
414
285
  }
415
286
 
287
+ function removeNameRelatedSubfieldCodes(codestring, tag) {
288
+ // If we have $0 (FIN11) match, we are not interested in the core name subfields. Remove them from the subfield codes string.
289
+ const removables = getNameRelatedSubfieldCodes(tag); // These are different for X00, X10 and X11...
290
+ return removeCharsFromString(codestring, removables);
416
291
 
417
- function pairableAsteriIDs(baseField, sourceField) {
418
- //nvdebug(`ASTERI1 ${fieldToString(baseField)}`, debugDev); // eslint-disable-line
419
- //nvdebug(`ASTERI2 ${fieldToString(sourceField)}`, debugDev); // eslint-disable-line
292
+ function removeCharsFromString(string, removableCharsAsString) {
293
+ const removableChars = removableCharsAsString.split('');
294
+ return string.split('').filter(c => !removableChars.includes(c)).join('');
295
+ }
420
296
 
421
- // Check that relevant control subfield(s) exist in both records (as controlSubfieldsPermitMerge() doesn't check it):
422
- const fin11a = getAsteriIDs(baseField);
423
- if (fin11a.length === 0) {
424
- return false;
297
+ function getNameRelatedSubfieldCodes(tag) {
298
+ if (['100', '600', '700', '800'].includes(tag)) {
299
+ return 'abcdq';
300
+ }
301
+ if (['110', '610', '710', '810'].includes(tag)) {
302
+ return 'abcdn';
303
+ }
304
+ if (['111', '611', '711', '811'].includes(tag)) {
305
+ return 'acden';
306
+ }
307
+ return '';
425
308
  }
426
- const fin11b = getAsteriIDs(sourceField);
427
- if (fin11b.length === 0) {
309
+ }
310
+
311
+ function pairableIdentifier(field1, field2, prefix) {
312
+ const normalizedPrefix = prefix;
313
+ nvdebug(`PREF '${prefix}' => '${normalizedPrefix}'`);
314
+
315
+ const prefixLength = normalizedPrefix.length;
316
+ const identifiers1 = getIdentifiers(field1);
317
+ if (identifiers1.length !== 1) {
428
318
  return false;
429
319
  }
430
- //nvdebug(`ASTERI WP3:\n${fin11a.join(", ")}\n${fin11b.join(", ")}`, debugDev); // eslint-disable-line
431
-
432
- // Check that found control subfields agree. Use pre-existing generic function to reduce code.
433
- // (NB! We could optimize and just return true here, as control subfield check is done elsewhere as well.
434
- // However, explicitly checking them here makes the code more robust.)
435
- if (!controlSubfieldsPermitMerge(baseField, sourceField)) {
320
+ const identifiers2 = getIdentifiers(field2);
321
+ if (identifiers2.length !== 1) {
436
322
  return false;
437
323
  }
438
- //console.log(`ASTERI PAIR ${fieldToString(sourceField)}`); // eslint-disable-line
439
- return true;
440
324
 
441
- // NB! This boldly assumes that the default prefix for Asteri is '(FIN11)', not '(FI-ASTERI-N)' nor a finaf urn...
442
- function getAsteriIDs(field) {
325
+ return identifiers1[0] === identifiers2[0];
326
+
327
+ function getIdentifiers(field) {
443
328
  return field.subfields.filter(sf => sf.code === '0')
444
329
  .map(sf => normalizeControlSubfieldValue(sf.value))
445
- .filter(val => val.substring(0, 7) === '(FIN11)');
330
+ .filter(val => val.substring(0, prefixLength) === normalizedPrefix);
446
331
  }
447
332
  }
448
333
 
449
334
 
450
335
  function hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(field) {
336
+ // 700$s?
451
337
  if (field.tag === '260' || field.tag === '264') {
452
338
  return ['a', 'b', 'c', 'e', 'f', 'g'].some(subfieldCode => fieldHasMultipleSubfields(field, subfieldCode));
453
339
  }
@@ -461,134 +347,149 @@ function hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(field) {
461
347
  return false;
462
348
  }
463
349
 
464
- function pairableName(baseField, sourceField) {
465
- // 100$a$t: remove $t and everything after that
466
- const reducedField1 = fieldToNamePart(baseField);
467
- const reducedField2 = fieldToNamePart(sourceField);
350
+ function getRelevantSubfieldValues(field, subfieldCode) {
351
+ const values = field.subfields.filter(sf => sf.code === subfieldCode).map(sf => counterpartExtraNormalize(field.tag, subfieldCode, sf.value));
468
352
 
469
- const string1 = fieldToString(reducedField1);
470
- const string2 = fieldToString(reducedField2);
353
+ return values.filter(v => valueCarriesMeaning(field.tag, subfieldCode, v));
354
+ }
471
355
 
472
- //nvdebug(`IN: pairableName():\n '${string1}' vs\n '${string2}'`, debugDev);
473
- if (string1 === string2) {
356
+ function pairableValue(tag, subfieldCode, value1, value2) {
357
+ if (value1 === value2) {
474
358
  return true;
475
359
  }
476
360
 
477
- // Essentially these are too hard to handle with field-merge (eg. multi-505$g)
478
- if (hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(reducedField1) || hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(reducedField2)) {
479
- return false;
480
- }
481
-
482
- // Compare the remaining subsets...
483
- // First check that name matches...
484
- if (uniqueKeyMatches(reducedField1, reducedField2)) {
485
- nvdebug(` name match: '${fieldToString(reducedField1)}'`, debugDev);
361
+ // This function could just return true or false.
362
+ // I thought of preference when I wrote this, but preference implemented *here* (modularity). mergeFields.js should handle preference.
363
+ if (withAndWithoutQualifierAgree(value1, value2, tag, subfieldCode)) {
364
+ // 300$a "whatever" and "whatever (123 sivua)"
486
365
  return true;
487
366
  }
488
-
489
- // However, name mismatch is not critical! If Asteri ID matches, it's still a match! *NOT* sure whether this a good idea.
490
- // 2023-01-24 Disable this. Caretaker can fix these later on. Not a job for merge. We can't be sure that $0 pair is corrent, nor which version (base or source) to use.
491
- // 2023-03-07: Enable this again!
492
- if (pairableAsteriIDs(baseField, sourceField)) {
493
- //nvdebug(` name match based on ASTERI $0'`, debugDev);
367
+ if (partsAgree(value1, value2, tag, subfieldCode) || corporateNamesAgree(value1, value2, tag, subfieldCode)) {
368
+ // Pure baseness: here we assume that base's value1 is better than source's value2.
494
369
  return true;
495
370
  }
496
371
 
497
- nvdebug(` name mismatch:`, debugDev);
498
- nvdebug(` '${fieldToString(reducedField1)}' vs`, debugDev);
499
- nvdebug(` '${fieldToString(reducedField2)}'`, debugDev);
500
372
  return false;
501
373
  }
502
374
 
375
+ function pairableValueInArray(tag, subfieldCode, val, arr) {
376
+ return arr.some(val2 => pairableValue(tag, subfieldCode, val, val2));
377
+ }
503
378
 
504
- function semanticallyMergablePair(baseField, sourceField) {
505
- // On rare occasions a field contains also a title part. For these name part (= normally everything) and title part
506
- // must be checked separately:
507
- if (!titlePartsMatch(baseField, sourceField)) {
508
- nvdebug(` ${baseField.tag} is unmergable: Title part mismatch.`, debugDev);
509
- return false;
510
- }
511
379
 
512
- // Hmm... we should check lifespan here, $d YYYY
380
+ function tightSubfieldMatch(field1, field2, subfieldCode, mustHave = false) {
381
+ nvdebug(`${subfieldCode} F1: ${fieldToString(field1)}`);
382
+ nvdebug(`${subfieldCode} F2: ${fieldToString(field2)}`);
383
+ const values1 = getRelevantSubfieldValues(field1, subfieldCode);
384
+ const values2 = getRelevantSubfieldValues(field2, subfieldCode);
513
385
 
514
- // Handle the field specific "unique key" (=set of fields that make the field unique
515
- if (!pairableName(baseField, sourceField)) {
516
- nvdebug('Unmergable: Name part mismatch', debugDev);
386
+ if(!mustHave) {
387
+ if (values1.length === 0 || values2.length === 0) {
388
+ return true;
389
+ }
390
+ }
391
+
392
+ if (values1.length !== values2.length) {
517
393
  return false;
518
394
  }
519
- //debug(' Semantic checks passed! We are MERGABLE!');
520
395
 
521
- return true;
396
+ nvdebug(`Compare $${subfieldCode} contents:\n '${values1.join("'\n '")}' vs\n '${values2.join("'\n '")}'`);
397
+ return values1.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values2)) && values2.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values1));
522
398
  }
523
399
 
524
-
525
- function namePartThreshold(field) {
526
- // Threshold is only applicaple to some tags..
527
- if (!(/[10]0$/u).test(field.tag)) {
528
- return -1;
400
+ function looseSubfieldMatch(field1, field2, subfieldCode) {
401
+ const values1 = getRelevantSubfieldValues(field1, subfieldCode);
402
+ const values2 = getRelevantSubfieldValues(field2, subfieldCode);
403
+ if (values1.length === 0 || values2.length === 0) {
404
+ return true;
529
405
  }
530
- const t = field.subfields.findIndex(currSubfield => currSubfield.code === 't');
531
- const u = t; // field.subfields.findIndex(currSubfield => currSubfield.code === 'u');
532
- if (t === -1) {
533
- return u;
406
+ // Subsets are fine:
407
+ if (values1.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values2))) {
408
+ return true;
534
409
  }
535
- if (u === -1) {
536
- return t;
410
+ if (values2.every(v => pairableValueInArray(field1.tag, subfieldCode, v, values1))) {
411
+ return true;
537
412
  }
538
- return t > u ? u : t;
413
+ return false;
539
414
  }
540
415
 
541
- function fieldToNamePart(field) {
542
- const index = namePartThreshold(field);
543
- const relevantSubfields = field.subfields.filter((sf, i) => i < index || index === -1).filter(sf => !irrelevantSubfieldsInNameAndTitlePartComparison.includes(sf.code));
416
+ function semanticallyMergablePair(baseField, sourceField) {
417
+ const field1 = cloneAndNormalizeFieldForComparison(baseField);
418
+ const field2 = cloneAndNormalizeFieldForComparison(sourceField);
544
419
 
545
- const subsetField = {'tag': field.tag, 'ind1': field.ind1, 'ind2': field.ind2, subfields: relevantSubfields};
420
+ const string1 = fieldToString(field1);
421
+ const string2 = fieldToString(field2);
546
422
 
547
- /*
548
- if (index > -1) {
549
- debugDev(`Name subset: ${fieldToString(subsetField)}`);
423
+ nvdebug(`IN ${baseField.tag}: pairableName():\n '${string1}' vs\n '${string2}'`, debugDev);
424
+ if (string1 === string2) {
425
+ return true;
550
426
  }
551
- */
552
427
 
553
- // Ummm... Sometimes $0 comes after $t but belongs to name part
428
+ const mergeConstraints = getMergeConstraintsForTag(field1.tag); // The tag doe
429
+ if (mergeConstraints.length === 0) { // We have no constraints defined for this tag -> fail
430
+ return false;
431
+ }
554
432
 
555
- return subsetField;
556
- }
433
+ // Essentially these are too hard to handle with field-merge (eg. multi-505$g)
434
+ if (hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(field1) || hasRepeatableSubfieldThatShouldBeTreatedAsNonRepeatable(field2)) {
435
+ nvdebug(`Unmergable: data is too complex to be automatically safely merged`, debugDev);
436
+ return false;
437
+ }
557
438
 
558
- function fieldToTitlePart(field) {
559
- // Take everything after 1st subfield $t...
560
- const index = field.subfields.findIndex(currSubfield => currSubfield.code === 't');
561
- const relevantSubfields = field.subfields.filter((sf, i) => i >= index).filter(sf => !irrelevantSubfieldsInNameAndTitlePartComparison.includes(sf.code));
562
- const subsetField = {'tag': field.tag, 'ind1': field.ind1, 'ind2': field.ind2, subfields: relevantSubfields};
563
- debugDev(`Title subset: ${fieldToString(subsetField)}`);
564
- return subsetField;
565
- }
439
+ const asteriMatch = pairableIdentifier(field1, field2, '(FIN11)'); // If there's a match, there's no need to check the name (Caretaker will handle these.)
440
+ // WE COULD REMOVE THESE FIELDS IN MERGE, SO THAT WE WON'T GET FUNNY NAMES).
566
441
 
567
- function containsTitlePart(field) {
568
- return fieldCanHaveTitlePart(field) && fieldHasSubfield(field, 't');
442
+ // NB! Currently we should get only one mergeContraint. However, should we support multiple merge contraints (= multiple profiles)?
443
+ const allRequired = mergeConstraints[0].required || ''; // getMergeConstraintsForTag(field1.tag, 'required') || '';
444
+ const reallyRequired = asteriMatch ? removeNameRelatedSubfieldCodes(allRequired, field1.tag) : allRequired;
569
445
 
570
- function fieldCanHaveTitlePart(field) {
571
- return ['100', '110', '111', '700', '710', '711'].includes(field.tag);
446
+ //nvdebug(`WP1: '${allRequired}' => ${reallyRequired}`);
447
+ if (!reallyRequired.split('').every(c => tightSubfieldMatch(field1, field2, c, true))) {
448
+ return false;
572
449
  }
573
- }
574
450
 
575
- function titlePartsMatch(field1, field2) {
576
- if (!containsTitlePart(field1)) {
577
- return !containsTitlePart(field2);
451
+ const allPaired = mergeConstraints[0].paired || ''; // getMergeConstraintsForTag(field1.tag, 'paired') || '';
452
+ const reallyPaired = asteriMatch ? removeNameRelatedSubfieldCodes(allPaired, field1.tag) : allPaired;
453
+ //nvdebug(`WP2: '${allPaired}' => ${reallyPaired}`);
454
+ if (!reallyPaired.split('').every(c => tightSubfieldMatch(field1, field2, c, false))) {
455
+ return false;
578
456
  }
579
- if (!containsTitlePart(field2)) {
457
+
458
+ const allKeys = mergeConstraints[0].key || ''; // getMergeConstraintsForTag(field1.tag, 'key') || '';
459
+ const relevantKeys = asteriMatch ? removeNameRelatedSubfieldCodes(allKeys, field1.tag) : allKeys
460
+ //nvdebug(`WP3: keys='${allKeys}' => ${relevantKeys}`);
461
+ if (!relevantKeys.split('').every(c => looseSubfieldMatch(field1, field2, c))) {
580
462
  return false;
581
463
  }
464
+ //nvdebug('WP4');
465
+
466
+ // required/paired/keys checks did not fail. Now check that did they really succeed
467
+ if (allRequired.length > 0) { // I think we should use all here
468
+ return true;
469
+ }
582
470
 
583
- debugDev(`TITLE PARTS NEED TO BE COMPARED`);
471
+ if (reallyPaired.length > 0 && field1.subfields.some(sf => reallyPaired.includes(sf.code))) {
472
+ return true;
473
+ }
584
474
 
585
- // 100$a$t: remove $t and everything after that
586
- const subset1 = fieldToTitlePart(field1);
587
- const subset2 = fieldToTitlePart(field2);
588
- // Easter Egg, ffs. Hardcoded exception
589
- return mandatorySubfieldComparison(subset1, subset2, 'dfhklmnoprstxvg');
590
- }
475
+ if(!tagIsRepeatable(field1.tag) || relevantKeys.length == 0) {
476
+ return true;
477
+ }
591
478
 
479
+ // Raison d'être is long forgotten, but my educated guess about this: if 'key' is defined in merge constraints
480
+ // for this field, then at least one of the subfield codes in 'key' must be present in both fields.
481
+ // However, this is not necessarily right.
482
+ if (relevantKeys.length > 0) {
483
+ if (field1.subfields.some(sf => relevantKeys.includes(sf.code)) || field2.subfields.some(sf => relevantKeys.includes(sf.code))) {
484
+ return relevantKeys.split('').some(code => hasCommonNominator(field1, field2, code));
485
+ }
486
+ }
487
+
488
+ nvdebug(` name mismatch (${keys}):`, debugDev);
489
+ nvdebug(` '${fieldToString(baseField)}' vs`, debugDev);
490
+ nvdebug(` '${fieldToString(sourceField)}'`, debugDev);
491
+ return false;
492
+ }
592
493
 
593
494
  function getAlternativeNamesFrom9XX(record, field) {
594
495
  // Should we support 6XX and 8XX as well? Prolly not...
@@ -647,7 +548,6 @@ function getCounterpartIndex(field, counterpartCands, altNames, config) {
647
548
 
648
549
 
649
550
  function field264Exception(baseField, sourceRecord, sourceField, config) {
650
- nvdebug('Field 264 exception as per MET-456', debugDev);
651
551
  if (baseField.tag !== '264') {
652
552
  return false;
653
553
  }
@@ -675,6 +575,7 @@ function getCounterpartCandidates(field, record) {
675
575
  function isCopyrightField264(field) {
676
576
  return field.tag === '264' && field.ind2 === '4';
677
577
  }
578
+
678
579
  function isNotCopyrightYear(field) {
679
580
  if (field.tag === '264') {
680
581
  return !isCopyrightField264(field);
@@ -710,19 +611,10 @@ export function getCounterpart(baseRecord, sourceRecord, field, config) {
710
611
 
711
612
  const normalizedField = cloneAndNormalizeFieldForComparison(field); // mainly strip punctuation here
712
613
 
713
- nvdebug(`Norm to: '${fieldToString(normalizedField)}'`, debugDev);
614
+ nvdebug(` Normalize incoming field to: '${fieldToString(normalizedField)}'`, debugDev);
714
615
 
715
616
  const uniqueAlternativeNames = getUniqueAlernativeNames();
716
617
 
717
- function getUniqueAlernativeNames() {
718
- if (baseIsSource(baseRecord, sourceRecord)) {
719
- return [];
720
- }
721
- // Try to look for alternative names from base and source record's 9XX fields:
722
- const alternativeNames = getAlternativeNamesFrom9XX(baseRecord, field).concat(getAlternativeNamesFrom9XX(sourceRecord, field));
723
- return alternativeNames.filter((name, i) => alternativeNames.indexOf(name) === i);
724
- }
725
-
726
618
  //nvdebug(` S: ${fieldToString(normalizedField)}`, debugDev);
727
619
  // Then find (the index of) the first mathing candidate field and return it.
728
620
  const index = getCounterpartIndex(normalizedField, counterpartCands, uniqueAlternativeNames, config);
@@ -737,5 +629,14 @@ export function getCounterpart(baseRecord, sourceRecord, field, config) {
737
629
  }
738
630
 
739
631
  return null;
632
+
633
+ function getUniqueAlernativeNames() {
634
+ if (baseIsSource(baseRecord, sourceRecord)) {
635
+ return [];
636
+ }
637
+ // Try to look for alternative names from base and source record's 9XX fields:
638
+ const alternativeNames = getAlternativeNamesFrom9XX(baseRecord, field).concat(getAlternativeNamesFrom9XX(sourceRecord, field));
639
+ return alternativeNames.filter((name, i) => alternativeNames.indexOf(name) === i);
640
+ }
740
641
  }
741
642