@natlibfi/marc-record-validators-melinda 12.0.6 → 12.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/dist/cyrillux.js +11 -11
  2. package/dist/cyrillux.js.map +2 -2
  3. package/dist/dataProvenanceUtils.js +19 -0
  4. package/dist/dataProvenanceUtils.js.map +7 -0
  5. package/dist/index.js +3 -1
  6. package/dist/index.js.map +2 -2
  7. package/dist/merge-fields/controlSubfields.js.map +2 -2
  8. package/dist/merge-fields/counterpartField.js +149 -152
  9. package/dist/merge-fields/counterpartField.js.map +3 -3
  10. package/dist/merge-fields/dataProvenance.js +6 -20
  11. package/dist/merge-fields/dataProvenance.js.map +2 -2
  12. package/dist/merge-fields/index.js +1 -1
  13. package/dist/merge-fields/index.js.map +2 -2
  14. package/dist/merge-fields/mergableIndicator.js +1 -2
  15. package/dist/merge-fields/mergableIndicator.js.map +2 -2
  16. package/dist/merge-fields/mergeConfig.js +2 -0
  17. package/dist/merge-fields/mergeConfig.js.map +2 -2
  18. package/dist/merge-fields/mergeConstraints.js +35 -32
  19. package/dist/merge-fields/mergeConstraints.js.map +3 -3
  20. package/dist/merge-fields/mergeField.js +4 -3
  21. package/dist/merge-fields/mergeField.js.map +2 -2
  22. package/dist/merge-fields/mergeOrAddSubfield.js +8 -7
  23. package/dist/merge-fields/mergeOrAddSubfield.js.map +2 -2
  24. package/dist/merge-fields/mergeSubfield.js +5 -1
  25. package/dist/merge-fields/mergeSubfield.js.map +2 -2
  26. package/dist/merge-fields/worldKnowledge.js +52 -0
  27. package/dist/merge-fields/worldKnowledge.js.map +2 -2
  28. package/dist/merge-fields.test.js +2 -2
  29. package/dist/merge-fields.test.js.map +2 -2
  30. package/dist/normalize-dashes.js +2 -2
  31. package/dist/normalize-dashes.js.map +2 -2
  32. package/dist/normalizeFieldForComparison.js +8 -14
  33. package/dist/normalizeFieldForComparison.js.map +2 -2
  34. package/dist/prepublicationUtils.js +1 -1
  35. package/dist/prepublicationUtils.js.map +2 -2
  36. package/dist/punctuation2.js +10 -10
  37. package/dist/punctuation2.js.map +2 -2
  38. package/dist/removeDuplicateDataFields.js +1 -24
  39. package/dist/removeDuplicateDataFields.js.map +2 -2
  40. package/dist/removeInferiorDataFields.js +3 -2
  41. package/dist/removeInferiorDataFields.js.map +2 -2
  42. package/dist/sortSubfields.js +19 -19
  43. package/dist/sortSubfields.js.map +2 -2
  44. package/dist/subfield6Utils.js +0 -1
  45. package/dist/subfield6Utils.js.map +2 -2
  46. package/dist/subfield8Utils.js +0 -5
  47. package/dist/subfield8Utils.js.map +2 -2
  48. package/dist/utils.js +29 -3
  49. package/dist/utils.js.map +2 -2
  50. package/package.json +4 -4
  51. package/src/cyrillux.js +11 -11
  52. package/src/dataProvenanceUtils.js +21 -0
  53. package/src/index.js +3 -1
  54. package/src/merge-fields/controlSubfields.js +0 -1
  55. package/src/merge-fields/counterpartField.js +191 -290
  56. package/src/merge-fields/dataProvenance.js +8 -25
  57. package/src/merge-fields/index.js +1 -1
  58. package/src/merge-fields/mergableIndicator.js +1 -2
  59. package/src/merge-fields/mergeConfig.js +2 -1
  60. package/src/merge-fields/mergeConstraints.js +39 -34
  61. package/src/merge-fields/mergeField.js +4 -7
  62. package/src/merge-fields/mergeOrAddSubfield.js +8 -7
  63. package/src/merge-fields/mergeSubfield.js +11 -2
  64. package/src/merge-fields/worldKnowledge.js +72 -3
  65. package/src/merge-fields.test.js +2 -2
  66. package/src/normalize-dashes.js +2 -2
  67. package/src/normalizeFieldForComparison.js +19 -20
  68. package/src/prepublicationUtils.js +1 -1
  69. package/src/punctuation2.js +10 -10
  70. package/src/removeDuplicateDataFields.js +24 -24
  71. package/src/removeInferiorDataFields.js +3 -2
  72. package/src/sortSubfields.js +19 -19
  73. package/src/subfield6Utils.js +1 -1
  74. package/src/subfield8Utils.js +5 -5
  75. package/src/utils.js +39 -12
  76. package/test-fixtures/cyrillux/f14/expectedResult.json +32 -0
  77. package/test-fixtures/cyrillux/f14/metadata.json +10 -0
  78. package/test-fixtures/cyrillux/f14/record.json +14 -0
  79. package/test-fixtures/merge-fields/f042_01/expectedResult.json +12 -0
  80. package/test-fixtures/merge-fields/f042_01/metadata.json +6 -0
  81. package/test-fixtures/merge-fields/f042_01/record.json +13 -0
  82. package/test-fixtures/merge-fields/f06/expectedResult.json +42 -0
  83. package/test-fixtures/merge-fields/f06/metadata.json +6 -0
  84. package/test-fixtures/merge-fields/f06/record.json +41 -0
  85. package/test-fixtures/merge-fields/f07/expectedResult.json +18 -0
  86. package/test-fixtures/merge-fields/f07/metadata.json +6 -0
  87. package/test-fixtures/merge-fields/f07/record.json +18 -0
  88. package/test-fixtures/merge-fields/f08/expectedResult.json +12 -0
  89. package/test-fixtures/merge-fields/f08/metadata.json +7 -0
  90. package/test-fixtures/merge-fields/f08/record.json +10 -0
  91. package/test-fixtures/merge-fields/f09/expectedResult.json +14 -0
  92. package/test-fixtures/merge-fields/f09/metadata.json +6 -0
  93. package/test-fixtures/merge-fields/f09/record.json +14 -0
  94. package/test-fixtures/merge-fields/f10/expectedResult.json +25 -0
  95. package/test-fixtures/merge-fields/f10/metadata.json +6 -0
  96. package/test-fixtures/merge-fields/f10/record.json +25 -0
  97. package/test-fixtures/merge-fields/f11/expectedResult.json +40 -0
  98. package/test-fixtures/merge-fields/f11/metadata.json +7 -0
  99. package/test-fixtures/merge-fields/f11/record.json +50 -0
  100. package/test-fixtures/merge-fields/f12/expectedResult.json +17 -0
  101. package/test-fixtures/merge-fields/f12/metadata.json +6 -0
  102. package/test-fixtures/merge-fields/f12/record.json +25 -0
  103. package/test-fixtures/merge-fields/f13/expectedResult.json +18 -0
  104. package/test-fixtures/merge-fields/f13/metadata.json +6 -0
  105. package/test-fixtures/merge-fields/f13/record.json +28 -0
  106. package/test-fixtures/merge-fields/f14/expectedResult.json +25 -0
  107. package/test-fixtures/merge-fields/f14/metadata.json +6 -0
  108. package/test-fixtures/merge-fields/f14/record.json +25 -0
  109. package/test-fixtures/merge-fields/f300_01/expectedResult.json +9 -0
  110. package/test-fixtures/merge-fields/f300_01/metadata.json +6 -0
  111. package/test-fixtures/merge-fields/f300_01/record.json +8 -0
  112. package/test-fixtures/merge-fields/f300_02/expectedResult.json +13 -0
  113. package/test-fixtures/merge-fields/f300_02/metadata.json +6 -0
  114. package/test-fixtures/merge-fields/f300_02/record.json +16 -0
  115. package/test-fixtures/merge-fields/f490_01/expectedResult.json +13 -0
  116. package/test-fixtures/merge-fields/f490_01/metadata.json +6 -0
  117. package/test-fixtures/merge-fields/f490_01/record.json +16 -0
  118. package/test-fixtures/remove-inferior-datafields/f17/expectedResult.json +11 -0
  119. package/test-fixtures/remove-inferior-datafields/f17/metadata.json +5 -0
  120. package/test-fixtures/remove-inferior-datafields/f17/record.json +15 -0
@@ -1,41 +1,24 @@
1
1
  // See https://www.loc.gov/marc/bibliographic/bdapndxj.html for details
2
2
 
3
- import {subfieldArraysContainSameData} from "../utils.js";
4
-
5
- export function tagToDataProvenanceSubfieldCode(tag) {
6
- if ( ['533', '800', '810', '811', '830'].includes(tag)) {
7
- return 'y';
8
- }
9
- if ( tag === '856' || tag === '857' ) {
10
- return 'e';
11
- }
12
-
13
- if ( tag.match(/^7[678]/u) ) {
14
- return 'l'
15
- }
16
-
17
- if ( tag.match(/^00/u)) {
18
- return undefined;
19
- }
20
- return '7';
21
- }
22
-
3
+ import {subfieldArraysContainSameData} from '../utils.js';
4
+ import {tagToDataProvenanceSubfieldCode} from '../dataProvenanceUtils.js';
23
5
 
24
6
  export function provenanceSubfieldsPermitMerge(baseField, sourceField) {
25
- const provinanceSubfieldCode = tagToDataProvenanceSubfieldCode(baseField.tag);
26
7
  if (!baseField.subfields) {
27
8
  return true;
28
9
  }
29
- if (provinanceSubfieldCode === undefined) {
10
+ const provenanceSubfieldCode = tagToDataProvenanceSubfieldCode(baseField.tag);
11
+
12
+ if (provenanceSubfieldCode === undefined) {
30
13
  return false;
31
14
  }
32
15
 
33
- const baseProvinanceSubfields = baseField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
34
- const sourceProvinanceSubfields = sourceField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
16
+ const baseProvenanceSubfields = baseField.subfields.filter(sf => sf.code === provenanceSubfieldCode);
17
+ const sourceProvenanceSubfields = sourceField.subfields.filter(sf => sf.code === provenanceSubfieldCode);
35
18
 
36
19
  // Currently we just compare two arrays. Later on we might do something more sophisticated with specific $7 data provenance category/relationship codes,
37
20
  // or actual values.
38
21
 
39
- return subfieldArraysContainSameData(baseProvinanceSubfields, sourceProvinanceSubfields);
22
+ return subfieldArraysContainSameData(baseProvenanceSubfields, sourceProvenanceSubfields);
40
23
 
41
24
  }
@@ -49,7 +49,7 @@ export default function (defaultTagPattern = undefined) {
49
49
  }
50
50
 
51
51
  function fix(record, config = undefined) {
52
- const config2 = config || defaultConfig;
52
+ const config2 = config || defaultConfig.mergeConfiguration;
53
53
  record.internalMerge = true;
54
54
  mergeFieldsWithinRecord(record, config2);
55
55
  delete record.internalMerge;
@@ -1,5 +1,5 @@
1
1
  import createDebugLogger from 'debug';
2
- import {marc21GetTagsLegalInd1Value, marc21GetTagsLegalInd2Value, nvdebug} from '../utils.js';
2
+ import {marc21GetTagsLegalInd1Value, marc21GetTagsLegalInd2Value} from '../utils.js';
3
3
 
4
4
  // Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
5
5
 
@@ -20,7 +20,6 @@ function marc21NoNeedToCheckInd1(tag) {
20
20
 
21
21
  function marc21NoNeedToCheckInd2(tag) {
22
22
  const cands = marc21GetTagsLegalInd2Value(tag);
23
- nvdebug(`CHECK IND2 ${typeof cands} FOR ${tag}`, debugDev);
24
23
  if (typeof cands === 'string') { // single cand
25
24
  return true;
26
25
  }
@@ -30,9 +30,10 @@ export const mergeConfig = {
30
30
  '246': {'0': 1, '1': 1, '2': 1, '3': 1, '4': 1, '5': 1, '6': 1, '7': 1, '8': 1, ' ': 2},
31
31
  '363': {'0': 1, '1': 1, ' ': 2},
32
32
  '382': {'0': 1, '1': 1, ' ': 2},
33
- '730': ['2', ' ']
33
+ '730': ['2', ' '] // If this is ok, it ok for 700, 710 and 711 as well...
34
34
  },
35
35
  'comment #4': 'List indicators that do not block merge here. Non-filing indicators do not prevent field merge (their support is hard-coded). They are mainly listed here as an example.',
36
+ // Actually I wouldn't want to ignore X00ind1=3 vs other values such as IND1=1... However, this is not currently supported.
36
37
  'ignoreIndicator1': ['100', '110', '111', '130', '210', '242', '245', '246', '247', '307', '490', '505', '506', '510', '511', '516', '520', '521', '522', '524', '526', '583', '586', '600', '610', '630', '650', '651', '655', '700', '710', '730', '740', '760', '762', '765', '767', '770', '772', '773', '774', '775', '776', '777', '780', '785', '786', '787', '788', '800', '810'],
37
38
  'ignoreIndicator2': ['017', '222', '240', '242', '243', '245', '760', '762', '765', '767', '770', '773', '774', '775', '776', '777', '786', '787', '788', '830'],
38
39
  'comment #5': 'If one indicator has value, and the other has not, it does not necessarily mean mismatch',
@@ -3,19 +3,20 @@ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:me
3
3
  //const debugData = debug.extend('data');
4
4
  const debugDev = debug.extend('dev');
5
5
 
6
- // Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
7
-
8
- // "key" is an unique key that must match (be absent or exist+be identical) in both.
9
- // "paired" refers to a field that must either exist in both or be absent in both (negative XOR). Typically it's not defined.
6
+ // "required": both fields bust have the subfield and the value must be same.
7
+ // "paired": subfield bust either exists (with value match) or not exist in both fields (negative XOR). Sort of an optional version of 'required'
8
+ // "key": one or both fields lack the subfield OR both fields have it and theor values match.
9
+ // There are typically NR subfields but the system should support multival repeatable subfields. However, (R) support is considered experimental.
10
10
  // NB: key+paired with identical values is an attempt to prevent copy for (ET) fields, and to force separate fields on (T) fields.
11
11
  // NB! If base has eg. no 264, two+ 264 fields can be copied from the source.
12
12
 
13
13
  // NB! not all X00 fields have, say, $x subfield. However, we can still share them...
14
14
  // $h is non-1XX?, $i is 7XX only, $w is 8XX only...
15
- const keyX00 = 'abcjloqrtuwx'; // Shared: $abcdefg...
16
- const keyX10 = 'abcdfghlnoprstuwx';
17
- const keyX11 = 'acdefghlnpqstuwx';
15
+ const keyX00 = 'abcjklnoqrstuwx'; // Shared: $abcdefg...
16
+ const keyX10 = 'abcdfghklnoprstuwx';
17
+ const keyX11 = 'acdefghklnpqstuwx';
18
18
  const keyX30 = 'adfghklmnoprstvwxyz';
19
+ const pairedTitleSubfields = 'iklmnoprst'
19
20
 
20
21
  const mergeConstraints = [
21
22
  {'tag': '010', 'required': 'a', 'key': 'a'},
@@ -41,8 +42,8 @@ const mergeConstraints = [
41
42
  {'tag': '037', 'required': 'b', 'key': 'ab'},
42
43
  {'tag': '039', 'required': 'a'},
43
44
  {'tag': '040', 'required': '', 'key': ''},
44
- {'tag': '041', 'required': '', 'paired': '2', 'key': ''}, // Don't put $2 in 'key'! hasCommonNominator() would get into trouble with it...
45
- {'tag': '042', 'required': 'a', 'key': ''}, // NB: preprocessor hacks applied
45
+ {'tag': '041', 'required': '', 'paired': '2', 'key': 'ad'}, // Don't put $2 in 'key'! hasCommonNominator() would get into trouble with it...
46
+ {'tag': '042', 'required': '', 'key': ''}, // Contents (supposedly $a subfields) can be anything, and still merge... (042 $a foo + 042 $b bar is perfectly mergeable)
46
47
  {'tag': '043', 'required': 'a', 'key': 'abc'},
47
48
  {'tag': '044', 'required': '', 'key': 'abc', 'paired': 'abc'},
48
49
  {'tag': '045', 'required': '', 'key': 'abc', 'paired': 'abc'}, // (ET) // 045 is problematic either-$a or $b or $c...
@@ -69,11 +70,11 @@ const mergeConstraints = [
69
70
  {'tag': '086', 'required': '', 'paired': 'a', 'key': 'a'},
70
71
  {'tag': '088', 'required': '', 'paired': 'a', 'key': 'a'},
71
72
  // NB! 100, 110 and 111 may have title parts that are handled elsewhere
72
- {'tag': '100', 'required': 'a', 'paired': 't', 'key': keyX00},
73
- {'tag': '110', 'required': 'a', 'paired': 'bt', 'key': keyX10},
74
- {'tag': '111', 'required': 'a', 'paired': 't', 'key': keyX11},
73
+ {'tag': '100', 'required': 'a', 'paired': `b${pairedTitleSubfields}`, 'key': keyX00},
74
+ {'tag': '110', 'required': 'a', 'paired': `b${pairedTitleSubfields}`, 'key': keyX10},
75
+ {'tag': '111', 'required': 'a', 'paired': pairedTitleSubfields, 'key': keyX11},
75
76
  // NB! 130 has no name part, key is used for title part
76
- {'tag': '130', 'required': 'a', 'key': keyX30},
77
+ {'tag': '130', 'required': 'a', paired: pairedTitleSubfields, 'key': keyX30},
77
78
  {'tag': '210', 'required': 'a', 'key': 'ab'},
78
79
  {'tag': '222', 'required': 'a', 'key': 'ab'},
79
80
  {'tag': '240', 'required': 'a', 'key': 'adfghklmnoprs'},
@@ -95,7 +96,7 @@ const mergeConstraints = [
95
96
  //{'tag': '264', 'required': '', 'paired': 'abc', 'key': 'abc'}, // NB "S.l." normalizations?" not implemented
96
97
  {'tag': '264', 'required': '', 'key': 'abc'}, // NB "S.l." normalizations?" not implemented
97
98
  // SKIP TAG 270 ON PURPOSE! Melinda's N=43.
98
- {'tag': '300', 'required': 'a', 'key': 'abcefg'},
99
+ {'tag': '300', 'required': '', 'key': 'abcefg'},
99
100
  {'tag': '306', 'required': 'a', 'key': 'a'},
100
101
  // SKIP TAG 307 ON PURPOSE! N=0
101
102
  {'tag': '310', 'required': 'a', 'key': 'ab'},
@@ -132,7 +133,7 @@ const mergeConstraints = [
132
133
  {'tag': '385', 'required': 'a', 'paired': 'abmn', 'key': 'abmn'},
133
134
  {'tag': '386', 'required': 'a', 'paired': 'abmn', 'key': 'abmn'},
134
135
  {'tag': '388', 'required': 'a', 'key': 'a'},
135
- {'tag': '490', 'required': 'a', 'key': 'axvl'},
136
+ {'tag': '490', 'required': '', 'key': 'axvl'},
136
137
  {'tag': '500', 'required': 'a', 'key': 'a'},
137
138
  {'tag': '501', 'required': 'a', 'key': 'a'},
138
139
  {'tag': '502', 'required': 'a', 'key': 'abcdgo'},
@@ -195,10 +196,10 @@ const mergeConstraints = [
195
196
  {'tag': '597', 'required': ''},
196
197
  {'tag': '598', 'required': ''},
197
198
  {'tag': '599', 'required': ''},
198
- {'tag': '600', 'required': 'a', 'paired': 'tvxyz', 'key': keyX00},
199
- {'tag': '610', 'required': 'a', 'paired': 'btvxyz', 'key': keyX10},
200
- {'tag': '611', 'required': 'a', 'paired': 'tvxyz', 'key': keyX11},
201
- {'tag': '630', 'required': 'a', 'paired': 'atvxyz', 'key': keyX30},
199
+ {'tag': '600', 'required': 'a', 'paired': `b${pairedTitleSubfields}vxyz`, 'key': keyX00},
200
+ {'tag': '610', 'required': 'a', 'paired': `b${pairedTitleSubfields}vxyz`, 'key': keyX10},
201
+ {'tag': '611', 'required': 'a', 'paired': `${pairedTitleSubfields}vxyz`, 'key': keyX11},
202
+ {'tag': '630', 'required': 'a', 'paired': `${pairedTitleSubfields}vxyz`, 'key': keyX30},
202
203
  // NB! 700, 710 and 711 may have title parts that are handled elsewhere
203
204
  {'tag': '647', 'required': 'a', 'paired': 'avxyz', 'key': 'acdgvxyz02'},
204
205
  {'tag': '648', 'required': 'a', 'paired': 'avxyz', 'key': 'avxyz02'},
@@ -212,12 +213,12 @@ const mergeConstraints = [
212
213
  {'tag': '658', 'required': 'a', 'paired': 'abcd'}, // N=0
213
214
  {'tag': '662', 'required': '', 'paired': 'abcdefgh'}, // N=0
214
215
  {'tag': '688', 'required': 'a'}, // N=0
215
- {'tag': '700', 'required': 'a', 'paired': 't', 'key': keyX00}, // h/i/m/o/r/s/x are missing from 100
216
- {'tag': '710', 'required': 'a', 'paired': 'bt', 'key': keyX10}, // h/j/m/o/r/s/x are missing from 110
217
- {'tag': '711', 'required': 'a', 'paired': 'cdeflns', 'key': keyX11}, // h/i/s/x are missing from 711
216
+ {'tag': '700', 'required': 'a', 'paired': `b${pairedTitleSubfields}x`, 'key': keyX00}, // h/i/m/o/r/s/x are missing from 100, NB! 's' is repeatable, but we are not merging them here!
217
+ {'tag': '710', 'required': 'a', 'paired': `b${pairedTitleSubfields}x`, 'key': keyX10}, // h/j/m/o/r/s/x are missing from 110
218
+ {'tag': '711', 'required': 'a', 'paired': `cdef${pairedTitleSubfields}x`, 'key': keyX11}, // h/i/s/x are missing from 711
218
219
  {'tag': '720', 'required': 'a', 'key': 'a'},
219
220
  // NB! 730 has no name part, key is used for title part
220
- {'tag': '730', 'required': 'a', 'key': keyX30}, // NB: 130->730 magic subfields might not agree...
221
+ {'tag': '730', 'required': 'a', 'paired': `df${pairedTitleSubfields}x`, 'key': keyX30}, // NB: 130->730 magic subfields might not agree...
221
222
  {'tag': '740', 'required': 'a', 'key': 'ahnp'},
222
223
  {'tag': '751', 'required': 'a', 'key': 'a'}, // N=11, kaikissa pelkkä $a
223
224
  {'tag': '752', 'required': '', 'key': 'abcdefgh'}, // N=12234
@@ -242,7 +243,7 @@ const mergeConstraints = [
242
243
  {'tag': '786', 'required': '', 'paired': 'abcrstuxyz', 'key': 'abcdhijmprstuxyz4'},
243
244
  {'tag': '787', 'required': '', 'paired': 'abcdhmstuxyz4'},
244
245
  {'tag': '788', 'required': '', 'paired': 'stx', 'key': 'abdestx'},
245
- {'tag': '800', 'required': 'a', 'paired': 't', 'key': keyX00},
246
+ {'tag': '800', 'required': 'a', 'paired': 'bt', 'key': keyX00},
246
247
  {'tag': '810', 'required': 'a', 'paired': 'bt', 'key': keyX10},
247
248
  {'tag': '811', 'required': 'a', 'paired': 't', 'key': keyX11},
248
249
  {'tag': '830', 'required': 'a', 'key': keyX30},
@@ -289,22 +290,26 @@ const mergeConstraints = [
289
290
  {'tag': 'SID', 'required': ''}
290
291
  ];
291
292
 
292
- function constraintToValue(tagsConstraints, constraintName) {
293
- if (constraintName in tagsConstraints) {
294
- return tagsConstraints[constraintName];
295
- }
296
- return null; // NB! "" might mean "apply to everything" (eg. 040.key) while null means that it is not applied.
297
- }
298
293
 
299
- export function getMergeConstraintsForTag(tag, constraintName) {
294
+ export function getMergeConstraintsForTag(tag, constraintName = undefined) {
300
295
  const tagsConstraintsArray = mergeConstraints.filter(entry => tag === entry.tag);
301
296
  if (tagsConstraintsArray.length === 0) {
302
- debugDev(`WARNING\tNo key found for ${tag}. Returning NULL!`);
303
- return null;
297
+ debugDev(`WARNING\tNo key found for ${tag}!`);
298
+ }
299
+ if (!constraintName) {
300
+ return tagsConstraintsArray;
304
301
  }
305
302
  // NB! should we support multiple contains for a field? Eg. 505$a vs 505($tg)+
306
303
  if (tagsConstraintsArray.length > 1) {
307
304
  debugDev(`WARNING\tMultiple values for '${constraintName}' (N=${tagsConstraintsArray.length}) found in ${tag}. Using first values.`);
308
305
  }
309
- return constraintToValue(tagsConstraintsArray[0], constraintName);
306
+ //return constraintToValue(tagsConstraintsArray[0], constraintName);
307
+ return tagsConstraintsArray.map(c => constraintToValue(c, constraintName));
308
+
309
+ function constraintToValue(tagsConstraints, constraintName) {
310
+ if (constraintName in tagsConstraints) {
311
+ return tagsConstraints[constraintName];
312
+ }
313
+ return null; // NB! "" might mean "apply to everything" (eg. 040.key) while null means that it is not applied.
314
+ }
310
315
  }
@@ -2,19 +2,16 @@
2
2
  import createDebugLogger from 'debug';
3
3
  import {fieldToString, fieldsToString, fieldsAreIdentical, nvdebug, hasCopyright, removeCopyright, subfieldToString} from '../utils.js';
4
4
  import {fieldGetOccurrenceNumberPairs} from '../subfield6Utils.js';
5
- import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation, isEnnakkotietoSubfieldG} from '../normalizeFieldForComparison.js';
5
+ import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation} from '../normalizeFieldForComparison.js';
6
6
  import {mergeOrAddSubfield} from './mergeOrAddSubfield.js';
7
7
  import {mergeIndicators} from './mergeIndicator.js';
8
8
  import {mergableTag} from './mergableTag.js';
9
9
  import {getCounterpart} from './counterpartField.js';
10
+ import {isEnnakkotietoSubfield} from '../prepublicationUtils.js';
10
11
  //import {default as normalizeEncoding} from '@natlibfi/marc-record-validators-melinda/dist/normalize-utf8-diacritics';
11
12
  //import {postprocessRecords} from './mergeOrAddPostprocess.js';
12
13
  //import {preprocessBeforeAdd} from './processFilter.js';
13
14
 
14
- //import fs from 'fs';
15
- //import path from 'path';
16
-
17
-
18
15
  //const defaultConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '..', '..', 'src', 'reducers', 'config.json'), 'utf8'));
19
16
 
20
17
  // Specs: https://workgroups.helsinki.fi/x/K1ohCw (though we occasionally differ from them)...
@@ -25,7 +22,7 @@ const debugDev = debug.extend('dev');
25
22
 
26
23
  // NB! Can we do this via config.json?
27
24
  function removeEnnakkotieto(field) {
28
- const tmp = field.subfields.filter(subfield => !isEnnakkotietoSubfieldG(subfield));
25
+ const tmp = field.subfields.filter(subfield => !isEnnakkotietoSubfield(subfield));
29
26
  // remove only iff some other subfield remains
30
27
  if (tmp.length > 0) {
31
28
  field.subfields = tmp;
@@ -59,7 +56,7 @@ function mergeField2(baseRecord, baseField, sourceField, config, candFieldPairs8
59
56
  // If a base ennakkotieto is merged with real data, remove ennakkotieto subfield:
60
57
  // (If our prepub normalizations are ok, this should not be needed.
61
58
  // However, it's simple and works well enough, so let's keep it here.)
62
- if (baseField.subfields?.find(sf => isEnnakkotietoSubfieldG(sf)) && !sourceField.subfields?.find(sf => isEnnakkotietoSubfieldG(sf))) {
59
+ if (baseField.subfields?.find(sf => isEnnakkotietoSubfield(sf)) && !sourceField.subfields?.find(sf => isEnnakkotietoSubfield(sf))) {
63
60
  removeEnnakkotieto(baseField);
64
61
  baseField.merged = 1;
65
62
  }
@@ -1,12 +1,13 @@
1
1
  import createDebugLogger from 'debug';
2
- import {cloneAndNormalizeFieldForComparison, isEnnakkotietoSubfieldG} from '../normalizeFieldForComparison.js';
2
+ import {cloneAndNormalizeFieldForComparison} from '../normalizeFieldForComparison.js';
3
3
  import {normalizeAs, normalizeControlSubfieldValue} from '../normalize-identifiers.js';
4
- import {fieldHasSubfield, fieldToString, isControlSubfieldCode, nvdebug, subfieldIsRepeatable, subfieldToString} from '../utils.js';
4
+ import {fieldHasSubfield, fieldToString, isContentSubfieldCode, nvdebug, subfieldIsRepeatable, subfieldToString} from '../utils.js';
5
5
  import {mergeSubfield} from './mergeSubfield.js';
6
6
  import {sortAdjacentSubfields} from '../sortSubfields.js';
7
7
 
8
8
  import {valueCarriesMeaning} from './worldKnowledge.js';
9
9
  import {resetSubfield6Tag} from '../subfield6Utils.js';
10
+ import {isEnnakkotietoSubfield} from '../prepublicationUtils.js';
10
11
 
11
12
  const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:mergeOrAddSubfield');
12
13
  //const debugData = debug.extend('data');
@@ -25,11 +26,11 @@ function catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTa
25
26
  return false;
26
27
  }
27
28
 
28
- function ennakkotietoInSubfieldG(candSubfieldData) {
29
- if (isEnnakkotietoSubfieldG({'code': candSubfieldData.code, 'value': candSubfieldData.originalValue})) {
29
+ function dataContainsPrepublicationSubfield(candSubfieldData) {
30
+ if (isEnnakkotietoSubfield({'code': candSubfieldData.code, 'value': candSubfieldData.originalValue})) {
30
31
  // Skip just ‡g subfield or the whole field?
31
32
  // We decided to skip just this subfield. We want at least $0 and maybe even more from ennakkotieto.
32
- debugDev(`Skip '‡g ${candSubfieldData.originalValue}'`);
33
+ debugDev(`Skip '‡${candSubfieldData.code} ${candSubfieldData.originalValue}'`);
33
34
  return true;
34
35
  }
35
36
  return false;
@@ -75,7 +76,7 @@ function skipNormalizedComparison(tag, subfieldCode, subfieldValue) {
75
76
  }
76
77
 
77
78
  function mergeOrAddSubfieldNotRequired(targetField, candSubfieldData) {
78
- if (catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTargetField(targetField, candSubfieldData) || ennakkotietoInSubfieldG(candSubfieldData)) {
79
+ if (catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTargetField(targetField, candSubfieldData) || dataContainsPrepublicationSubfield(candSubfieldData)) {
79
80
  return true;
80
81
  }
81
82
 
@@ -126,7 +127,7 @@ function addSubfield(targetField, candSubfield) {
126
127
  }
127
128
 
128
129
  function setPunctuationFlag(field, addedSubfield) {
129
- if (isControlSubfieldCode(addedSubfield.code)) { // These are never punctuation related
130
+ if (!isContentSubfieldCode(addedSubfield.code, field.tag)) { // These are never punctuation related
130
131
  return;
131
132
  }
132
133
  field.useExternalEndPunctuation = 1;
@@ -1,6 +1,6 @@
1
1
  import createDebugLogger from 'debug';
2
2
  import {partsAgree, subfieldContainsPartData} from '../normalizeSubfieldValueForComparison.js';
3
- import {valueCarriesMeaning} from './worldKnowledge.js';
3
+ import {getSynonyms, valueCarriesMeaning} from './worldKnowledge.js';
4
4
  import {nvdebug} from '../utils.js';
5
5
  import {tagAndSubfieldCodeReferToIsbn} from '../normalizeFieldForComparison.js';
6
6
  import {canContainOptionalQualifier, splitToNameAndQualifier} from './counterpartField.js';
@@ -88,7 +88,7 @@ function isPehmeakantinen(value) {
88
88
  return ['mjuka pärmar', 'paperback', 'pehmeäkantinen', 'softcover'].includes(value);
89
89
  }
90
90
 
91
- function isItsenainenJatkoOsa(value) {
91
+ function isItsenainenJatkoOsa(value) { // Probably movable to synonyms...
92
92
  if (value.match(/^Fristående fortsättning på verket[^a-z]*$/ui)) {
93
93
  return true;
94
94
  }
@@ -142,6 +142,15 @@ function pairHttpAndHttps(candSubfield, relevantSubfields) {
142
142
  }
143
143
 
144
144
  function isSynonym(field, candSubfield, relevantSubfields) {
145
+
146
+ const finnishForm = getSynonyms(candSubfield.value, field.tag, candSubfield.code, 'fin');
147
+ if (finnishForm && finnishForm === relevantSubfields.some(sf => finnishForm === getSynonyms(sf.value, field.tag, candSubfield.code, 'fin'))) {
148
+ // NB! There's currently no intelligence (such as checking cat language from 040$b): the preferred value is the one in base.
149
+ // We might later on create a separate translation validator/fixer...
150
+ return true;
151
+ }
152
+
153
+
145
154
  if (candSubfield.code === 'q' && ['015', '020', '024', '028'].includes(field.tag)) {
146
155
  return coverTypesMatch(candSubfield, relevantSubfields);
147
156
  }
@@ -1,4 +1,7 @@
1
- //import {nvdebug} from '../utils';
1
+ //import {nvdebug} from '../utils.js';
2
+
3
+ // NB! This file (or at least synonyms) should eventually be moved away from merge to '..'.
4
+
2
5
 
3
6
  export function valueCarriesMeaning(tag, subfieldCode, value) {
4
7
  // Some data is pretty meaningless and as meaningless is pretty close to nothing, this meaningless data should no prevent merge.
@@ -20,11 +23,77 @@ export function valueCarriesMeaning(tag, subfieldCode, value) {
20
23
  return true;
21
24
  }
22
25
 
26
+ const synonyms = [
27
+ {tags: ['700', '710', '711', '730'], code: 'i', 'fin': 'Sisältää (ekspressio)', 'swe': 'Innehåller (uttryck)'},
28
+ {tags: ['700', '710', '711', '730'], code: 'i', 'fin': 'Sisältää (teos)', 'swe': 'Innehåller (verk)'},
29
+ {tags: ['700', '710', '711', '730'], code: 'l', 'fin': 'Englanti', 'swe': 'Engelska'},
30
+ {tags: ['700', '710', '711', '730'], code: 'l', 'fin': 'Ruotsi', 'swe': 'Svenska'},
31
+ {tags: ['700', '710', '711', '730'], code: 'l', 'fin': 'Suomi', 'swe': 'Finska'}
32
+ // There might eventually be need for a list of terms is given language (eg. engl. paperback and softcover)
33
+ ];
34
+
35
+ export function getSynonyms(term, tag = undefined, subfieldCode = undefined, preferredLanguage = undefined, ignoreCase = true, relevantLanguagesString = 'fin swe',) {
36
+ if (!term) {
37
+ return [];
38
+ }
39
+ //nvdebug(`WP1 CANDS: ${synonyms.length} FOR '${term}'`);
40
+ const relevantLanguges = relevantLanguagesString.split(/\s+/u);
41
+ const normalizedTerm = ignoreCase ? term.toLowerCase() : term;
42
+ const synonymsWithTag = tag ? synonyms.filter(s => s.tags.includes(tag)) : synonyms;
43
+ if (synonymsWithTag.length === 0) {
44
+ return [];
45
+ }
46
+ //nvdebug(`WP2 (FILTER ${tag}) CANDS: ${synonymsWithTag.length}`);
47
+ const synonymsWithTagAndCode = subfieldCode ? synonymsWithTag.filter(s => s.code === subfieldCode) : synonymsWithTag;
48
+ //nvdebug(`WP3 (FILTER $${subfieldCode}) CANDS: ${synonymsWithTagAndCode.length}:\n${JSON.stringify(synonymsWithTagAndCode)}`);
49
+ const matchingSynonyms = synonymsWithTagAndCode.filter(s => termAndLangMatch(s));
50
+
51
+ if (preferredLanguage && matchingSynonyms.length > 0) {
52
+ //console.log(`USING PREFERRED LANG '${preferredLanguage}' for TERM '${term}':\n${JSON.stringify(matchingSynonyms)}`);
53
+ return matchingSynonyms.map(s => s[preferredLanguage]);
54
+ }
55
+ return matchingSynonyms;
56
+
57
+ function termAndLangMatch(synonym) {
58
+ if (relevantLanguges.includes('fin')) {
59
+ if (ignoreCase && synonym.fin.toLowerCase() === normalizedTerm ) {
60
+ return true;
61
+ }
62
+ if (!ignoreCase && synonym.fin === term) {
63
+ return true;
64
+ }
65
+ }
66
+
67
+ if (relevantLanguges.includes('swe')) {
68
+ if (ignoreCase && synonym.swe.toLowerCase() === normalizedTerm ) {
69
+ return true;
70
+ }
71
+ if (!ignoreCase && synonym.swe === term) {
72
+ return true;
73
+ }
74
+ }
75
+ return false;
76
+ }
77
+ }
78
+
79
+ export function getSynonym(tag, subfieldCode, originalValue) {
80
+ const finnishForm = getSynonyms(originalValue, tag, subfieldCode, 'fin');
81
+ if (finnishForm.length === 1) {
82
+ //nvdebug(`FINNISH FORM FOR ${tag}$${subfieldCode}: '${finnishForm[0]}'`);
83
+ return finnishForm[0];
84
+ }
85
+ return originalValue;
86
+ }
87
+
23
88
  export function normalizeForSamenessCheck(tag, subfieldCode, originalValue) {
24
89
  // NB! These work only for non-repeatable subfields!
25
90
  // Repeatable subfields are currently handled in mergeSubfields.js. Only non-repeatable subfields block field merge,
26
- // (This split is suboptiomal... Minimum fix: make this disctinction cleaner...)
27
- if (subfieldCode === 'a' && ['100', '600', '700', '800'].includes(tag)) {
91
+ // (This split is suboptiomal... Minimum fix: make this distinction cleaner...)
92
+
93
+ //nvdebug(`TRYING TO DO ${tag}$${subfieldCode} '${originalValue}'`);
94
+ originalValue = getSynonym(tag, subfieldCode, originalValue);
95
+
96
+ if (subfieldCode === 'a' && ['100', '600', '700', '800'].includes(tag)) { // "Etunimi Sukunimi"...
28
97
  return normalizePersonalName(originalValue);
29
98
  }
30
99
 
@@ -1,4 +1,4 @@
1
- import assert from 'node:assert';
1
+ import assert from 'node:assert/strict';
2
2
  import {MarcRecord} from '@natlibfi/marc-record';
3
3
  import validatorFactory from './merge-fields/index.js';
4
4
  import {READERS} from '@natlibfi/fixura';
@@ -46,5 +46,5 @@ async function callback({getFixture, fix = false, tagPattern = false}) {
46
46
  }
47
47
 
48
48
  await validator.fix(record);
49
- assert.deepEqual(record, expectedResult);
49
+ assert.deepEqual(record, new MarcRecord(expectedResult));
50
50
  }
@@ -1,6 +1,6 @@
1
1
  //import createDebugLogger from 'debug';
2
2
  import clone from 'clone';
3
- import {fieldToString, isControlSubfieldCode, nvdebug} from './utils.js';
3
+ import {fieldToString, isContentSubfieldCode, nvdebug} from './utils.js';
4
4
 
5
5
  // Author(s): Nicholas Volk
6
6
  export default function () {
@@ -59,7 +59,7 @@ function fixDashes(field) {
59
59
  return field;
60
60
 
61
61
  function subfieldFixDashes(subfield) {
62
- if (isControlSubfieldCode(subfield.code)) {
62
+ if (!isContentSubfieldCode(subfield.code, field.tag)) {
63
63
  return;
64
64
  }
65
65
  // Normalize dashes U+2010 ... U+2015 to '-':
@@ -8,26 +8,18 @@
8
8
  */
9
9
  import clone from 'clone';
10
10
  import {fieldStripPunctuation} from './punctuation2.js';
11
- import {fieldToString, isControlSubfieldCode} from './utils.js';
11
+ import {fieldToString, isContentSubfieldCode} from './utils.js';
12
12
 
13
13
  import {fieldNormalizeControlNumbers/*, normalizeControlSubfieldValue*/} from './normalize-identifiers.js';
14
14
  import createDebugLogger from 'debug';
15
15
  import {normalizePartData, subfieldContainsPartData} from './normalizeSubfieldValueForComparison.js';
16
+ import {isEnnakkotietoSubfield} from './prepublicationUtils.js';
17
+ import {getSynonym} from './merge-fields/worldKnowledge.js';
16
18
 
17
19
  const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison');
18
20
  //const debugData = debug.extend('data');
19
21
  const debugDev = debug.extend('dev');
20
22
 
21
- export function isEnnakkotietoSubfieldG(subfield) {
22
- if (valuelessSubfield(subfield)) {
23
- return false;
24
- }
25
- if (subfield.code !== 'g') {
26
- return false;
27
- }
28
- return subfield.value.match(/^ENNAKKOTIETO\.?$/gui);
29
- }
30
-
31
23
  function debugFieldComparison(oldField, newField) { // NB: Debug-only function!
32
24
  /*
33
25
  // We may drop certain subfields:
@@ -60,7 +52,7 @@ function containsHumanName(tag = '???', subfieldCode = undefined) {
60
52
  }
61
53
 
62
54
  function containsCorporateName(tag = '???', subfieldCode = undefined) {
63
- // NB! This set is for bibs! Auth has 400... What else...
55
+ // NB! This set is for bibs! Auth has 410... What else...
64
56
  if (['110', '610', '710', '810'].includes(tag)) {
65
57
  if (subfieldCode === undefined || subfieldCode === 'a') {
66
58
  return true;
@@ -72,15 +64,15 @@ function containsCorporateName(tag = '???', subfieldCode = undefined) {
72
64
 
73
65
  function skipAllSubfieldNormalizations(value, subfieldCode, tag) {
74
66
 
75
- if (isEnnakkotietoSubfieldG({'code': subfieldCode, value})) {
67
+ if (isEnnakkotietoSubfield({'code': subfieldCode, value})) {
76
68
  return true;
77
69
  }
78
70
 
79
- if (tag === '035' && ['a', 'z'].includes(subfieldCode)) { // A
71
+ if (tag === '035' && ['a', 'z'].includes(subfieldCode)) {
80
72
  return true;
81
73
  }
82
74
 
83
- if (isControlSubfieldCode(subfieldCode)) {
75
+ if (!isContentSubfieldCode(subfieldCode, tag)) {
84
76
  return true;
85
77
  }
86
78
  return false;
@@ -244,17 +236,20 @@ function removeDecomposedDiacritics(value = '') {
244
236
 
245
237
  function normalizeSubfieldValue(value, subfieldCode, tag) {
246
238
  // NB! For comparison of values only
247
- /* eslint-disable */
239
+ /* beslint-disable */
240
+ value = removeCharsThatDontCarryMeaning(value, tag, subfieldCode);
241
+ value = getSynonym(tag, subfieldCode, value); // Must be done before punc stripping and lowercasing...
248
242
  value = subfieldValueLowercase(value, subfieldCode, tag);
249
243
 
244
+
250
245
  // Normalize: s. = sivut = pp.
251
246
  value = normalizePartData(value, subfieldCode, tag);
252
- value = value.replace(/^\[([^[\]]+)\]/gu, '$1'); // eslint-disable-line functional/immutable-data
247
+ value = value.replace(/^\[([^[\]]+)\]/gu, '$1');
253
248
 
254
249
  if (['130', '730'].includes(tag) && subfieldCode === 'a') {
255
250
  value = value.replace(' : ', ', '); // "Halloween ends (elokuva, 2022)" vs "Halloween ends (elokuva : 2023)"
256
251
  }
257
- /* eslint-enable */
252
+ /* beslint-enable */
258
253
 
259
254
  // Not going to do these in the foreseeable future, but keeping them here for discussion:
260
255
  // Possible normalizations include but are not limited to:
@@ -311,7 +306,8 @@ function normalizeField(field) {
311
306
  export function cloneAndNormalizeFieldForComparison(field) {
312
307
  // NB! This new field is for comparison purposes only.
313
308
  // Some of the normalizations might be considered a bit overkill for other purposes.
314
- const clonedField = clone(field);
309
+ const clonedField = cloneAndRemovePunctuation(field); // was only clone(field)
310
+
315
311
  if (fieldSkipNormalization(field)) {
316
312
  return clonedField;
317
313
  }
@@ -319,8 +315,11 @@ export function cloneAndNormalizeFieldForComparison(field) {
319
315
  if (valuelessSubfield(sf)) {
320
316
  return;
321
317
  }
318
+
322
319
  sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);
323
- sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);
320
+ //sf.value = normalizeForSamenessCheck(field.tag, sf.code, sf.value);
321
+
322
+
324
323
  });
325
324
 
326
325
  normalizeField(clonedField);
@@ -203,7 +203,7 @@ export function removeWorsePrepubField594s(record) {
203
203
 
204
204
 
205
205
  export function isEnnakkotietoSubfield(subfield) {
206
- if (subfield.code !== '9' && subfield.code !== 'g') {
206
+ if (!['g', '9', '7'].includes(subfield.code)) {
207
207
  return false;
208
208
  }
209
209
  // Length <= 13 allows punctuation, but does not require it: