@natlibfi/marc-record-validators-melinda 12.0.0-alpha.1 → 12.0.0-alpha.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.github/workflows/{melinda-node-tests.yml → melinda-node-tests-and-publish.yml} +36 -11
  2. package/dist/access-rights.test.js +1 -1
  3. package/dist/access-rights.test.js.map +1 -1
  4. package/dist/addMissingField337.test.js +1 -1
  5. package/dist/addMissingField337.test.js.map +1 -1
  6. package/dist/addMissingField338.test.js +1 -1
  7. package/dist/addMissingField338.test.js.map +1 -1
  8. package/dist/cyrillux-usemarcon-replacement.test.js +4 -7
  9. package/dist/cyrillux-usemarcon-replacement.test.js.map +2 -2
  10. package/dist/cyrillux.test.js +1 -1
  11. package/dist/cyrillux.test.js.map +1 -1
  12. package/dist/double-commas.test.js +1 -1
  13. package/dist/double-commas.test.js.map +1 -1
  14. package/dist/empty-fields.test.js +1 -1
  15. package/dist/empty-fields.test.js.map +1 -1
  16. package/dist/ending-punctuation-conf.js +6 -4
  17. package/dist/ending-punctuation-conf.js.map +2 -2
  18. package/dist/ending-punctuation.js +88 -18
  19. package/dist/ending-punctuation.js.map +3 -3
  20. package/dist/ending-punctuation.test.js +198 -103
  21. package/dist/ending-punctuation.test.js.map +2 -2
  22. package/dist/field-008-18-34-character-groups.test.js +1 -1
  23. package/dist/field-008-18-34-character-groups.test.js.map +1 -1
  24. package/dist/field-structure.test.js +1 -1
  25. package/dist/field-structure.test.js.map +1 -1
  26. package/dist/index.js +122 -59
  27. package/dist/index.js.map +2 -2
  28. package/dist/indicator-fixes.js +11 -1
  29. package/dist/indicator-fixes.js.map +2 -2
  30. package/dist/isbn-issn.js +8 -5
  31. package/dist/isbn-issn.js.map +2 -2
  32. package/dist/melindaCustomMergeFields.js +1 -1
  33. package/dist/melindaCustomMergeFields.js.map +2 -2
  34. package/dist/merge-fields/counterpartField.js +5 -0
  35. package/dist/merge-fields/counterpartField.js.map +2 -2
  36. package/dist/merge-fields/dataProvenance.js +29 -0
  37. package/dist/merge-fields/dataProvenance.js.map +7 -0
  38. package/dist/merge-fields/index.js +11 -2
  39. package/dist/merge-fields/index.js.map +2 -2
  40. package/dist/merge-fields/mergeField.js +1 -1
  41. package/dist/merge-fields/mergeField.js.map +2 -2
  42. package/dist/merge-fields.test.js +4 -2
  43. package/dist/merge-fields.test.js.map +2 -2
  44. package/dist/mergeField500Lisapainokset.js +1 -1
  45. package/dist/mergeField500Lisapainokset.js.map +2 -2
  46. package/dist/normalizeFieldForComparison.js +24 -0
  47. package/dist/normalizeFieldForComparison.js.map +2 -2
  48. package/dist/punctuation2.js +11 -5
  49. package/dist/punctuation2.js.map +2 -2
  50. package/dist/removeInferiorDataFields.js +2 -1
  51. package/dist/removeInferiorDataFields.js.map +2 -2
  52. package/dist/resolveOrphanedSubfield6s.js +1 -1
  53. package/dist/resolveOrphanedSubfield6s.js.map +2 -2
  54. package/dist/sortSubfields.js +5 -5
  55. package/dist/sortSubfields.js.map +2 -2
  56. package/dist/translate-terms.test.js +12 -2
  57. package/dist/translate-terms.test.js.map +2 -2
  58. package/dist/utils.js +9 -3
  59. package/dist/utils.js.map +2 -2
  60. package/package.json +22 -23
  61. package/src/access-rights.test.js +1 -1
  62. package/src/addMissingField337.test.js +1 -1
  63. package/src/addMissingField338.test.js +1 -1
  64. package/src/cyrillux-usemarcon-replacement.test.js +4 -9
  65. package/src/cyrillux.test.js +1 -1
  66. package/src/double-commas.test.js +1 -1
  67. package/src/empty-fields.test.js +1 -1
  68. package/src/ending-punctuation-conf.js +6 -5
  69. package/src/ending-punctuation.js +115 -24
  70. package/src/ending-punctuation.test.js +187 -104
  71. package/src/field-008-18-34-character-groups.test.js +1 -1
  72. package/src/field-structure.test.js +1 -1
  73. package/src/index.js +132 -59
  74. package/src/indicator-fixes.js +14 -1
  75. package/src/isbn-issn.js +11 -6
  76. package/src/melindaCustomMergeFields.js +1 -1
  77. package/src/merge-fields/counterpartField.js +6 -0
  78. package/src/merge-fields/dataProvenance.js +41 -0
  79. package/src/merge-fields/index.js +11 -2
  80. package/src/merge-fields/mergeField.js +2 -2
  81. package/src/merge-fields.test.js +6 -2
  82. package/src/mergeField500Lisapainokset.js +1 -1
  83. package/src/normalizeFieldForComparison.js +26 -0
  84. package/src/punctuation2.js +14 -5
  85. package/src/removeInferiorDataFields.js +4 -1
  86. package/src/resolveOrphanedSubfield6s.js +1 -1
  87. package/src/sortSubfields.js +7 -5
  88. package/src/translate-terms.test.js +25 -2
  89. package/src/utils.js +19 -3
  90. package/test-fixtures/indicator-fixes/10/expectedResult.json +11 -0
  91. package/test-fixtures/indicator-fixes/10/metadata.json +4 -0
  92. package/test-fixtures/indicator-fixes/10/record.json +11 -0
  93. package/test-fixtures/merge-fields/f05/expectedResult.json +24 -0
  94. package/test-fixtures/merge-fields/f05/metadata.json +6 -0
  95. package/test-fixtures/merge-fields/f05/record.json +30 -0
  96. package/test-fixtures/remove-inferior-datafields/f16/expectedResult.json +12 -0
  97. package/test-fixtures/remove-inferior-datafields/f16/metadata.json +5 -0
  98. package/test-fixtures/remove-inferior-datafields/f16/record.json +14 -0
  99. package/test-fixtures/translate-terms-data.js +42 -0
  100. package/src/melindaCustomMergeFields.json +0 -5120
@@ -1,6 +1,6 @@
1
1
  import assert from 'node:assert';
2
2
  import {MarcRecord} from '@natlibfi/marc-record';
3
- import validatorFactory from '../src/field-008-18-34-character-groups.js';
3
+ import validatorFactory from './field-008-18-34-character-groups.js';
4
4
  import {READERS} from '@natlibfi/fixura';
5
5
  import generateTests from '@natlibfi/fixugen';
6
6
  import createDebugLogger from 'debug';
@@ -1,7 +1,7 @@
1
1
  import assert from 'node:assert';
2
2
  import {describe, it} from 'node:test';
3
3
  import {MarcRecord} from '@natlibfi/marc-record';
4
- import validatorFactory from '../src/field-structure.js';
4
+ import validatorFactory from './field-structure.js';
5
5
 
6
6
 
7
7
  // Factory validation
package/src/index.js CHANGED
@@ -1,62 +1,85 @@
1
- import AccessRights from './access-rights';
2
- import AddMissingField041 from './addMissingField041';
3
- import AddMissingField336 from './addMissingField336';
4
- import AddMissingField337 from './addMissingField337';
5
- import AddMissingField338 from './addMissingField338';
6
- import Cyrillux from './cyrillux';
7
- import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement';
8
- import DisambiguateSeriesStatements from './disambiguateSeriesStatements';
9
- import DoubleCommas from './double-commas';
10
- import DuplicatesInd1 from './duplicates-ind1';
11
- import EmptyFields from './empty-fields';
12
- import EndingPunctuation from './ending-punctuation';
13
- import EndingWhitespace from './ending-whitespace';
14
- import Field008CharacterGroups from './field-008-18-34-character-groups';
15
- import Field505Separators from './field-505-separators';
16
- import Field521Fix from './field-521-fix';
17
- import FieldExclusion from './field-exclusion';
18
- import FieldStructure from './field-structure';
19
- import FieldsPresent from './fields-present';
20
- import Fix33X from './fix-33X';
21
- import FixCountryCodes from './fix-country-codes';
22
- import FixLanguageCodes from './fix-language-codes';
23
- import FixRelatorTerms from './fixRelatorTerms';
24
- import FixedFields from './fixed-fields';
25
- import IdenticalFields from './identical-fields';
26
- import IndicatorFixes from './indicator-fixes';
27
- import IsbnIssn from './isbn-issn';
28
- import ItemLanguage from './item-language';
29
- import MergeField500Lisapainokset from './mergeField500Lisapainokset';
30
- import MergeFields from './merge-fields/';
31
- import MergeRelatorTermFields from './mergeRelatorTermFields';
32
- import Modernize502 from './modernize-502';
33
- import MultipleSubfield0s from './multiple-subfield-0';
34
- import NonBreakingSpace from './non-breaking-space';
35
- import NormalizeDashes from './normalize-dashes';
36
- import NormalizeIdentifiers from './normalize-identifiers';
37
- import NormalizeQualifyingInformation from './normalize-qualifying-information';
38
- import NormalizeUTF8Diacritics from './normalize-utf8-diacritics';
39
- import Punctuation from './punctuation/';
40
- import Punctuation2 from './punctuation2';
41
- import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers';
42
- import RemoveDuplicateDataFields from './removeDuplicateDataFields';
43
- import RemoveInferiorDataFields from './removeInferiorDataFields';
44
- import ResolvableExtReferences from './resolvable-ext-references-melinda';
45
- import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s';
46
- import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes';
47
- import SortFields from './sortFields';
48
- import SortRelatorTerms from './sortRelatorTerms';
49
- import SortSubfields from './sortSubfields';
50
- import SortTags from './sort-tags';
1
+ import AccessRights from './access-rights.js';
2
+ import AddMissingField041 from './addMissingField041.js';
3
+ import AddMissingField336 from './addMissingField336.js';
4
+ import AddMissingField337 from './addMissingField337.js';
5
+ import AddMissingField338 from './addMissingField338.js';
6
+ import Cyrillux from './cyrillux.js';
7
+ import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement.js';
8
+ import DisambiguateSeriesStatements from './disambiguateSeriesStatements.js';
9
+ import DoubleCommas from './double-commas.js';
10
+ import DuplicatesInd1 from './duplicates-ind1.js';
11
+ import EmptyFields from './empty-fields.js';
12
+ import EndingPunctuation from './ending-punctuation.js';
13
+ import EndingWhitespace from './ending-whitespace.js';
14
+ import Field008CharacterGroups from './field-008-18-34-character-groups.js';
15
+ import Field505Separators from './field-505-separators.js';
16
+ import Field521Fix from './field-521-fix.js';
17
+ import FieldExclusion from './field-exclusion.js';
18
+ import FieldStructure from './field-structure.js';
19
+ import FieldsPresent from './fields-present.js';
20
+ import Fix33X from './fix-33X.js';
21
+ import FixCountryCodes from './fix-country-codes.js';
22
+ import FixLanguageCodes from './fix-language-codes.js';
23
+ import FixRelatorTerms from './fixRelatorTerms.js';
24
+ import FixedFields from './fixed-fields.js';
25
+ import IdenticalFields from './identical-fields.js';
26
+ import IndicatorFixes from './indicator-fixes.js';
27
+ import IsbnIssn from './isbn-issn.js';
28
+ import ItemLanguage from './item-language.js';
29
+ import MergeField500Lisapainokset from './mergeField500Lisapainokset.js';
30
+ import MergeFields from './merge-fields/index.js';
31
+ import MergeRelatorTermFields from './mergeRelatorTermFields.js';
32
+ import Modernize502 from './modernize-502.js';
33
+ import MultipleSubfield0s from './multiple-subfield-0.js';
34
+ import NonBreakingSpace from './non-breaking-space.js';
35
+ import NormalizeDashes from './normalize-dashes.js';
36
+ import NormalizeIdentifiers from './normalize-identifiers.js';
37
+ import NormalizeQualifyingInformation from './normalize-qualifying-information.js';
38
+ import NormalizeUTF8Diacritics from './normalize-utf8-diacritics.js';
39
+ import Punctuation from './punctuation/index.js';
40
+ import Punctuation2 from './punctuation2.js';
41
+ import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers.js';
42
+ import RemoveDuplicateDataFields from './removeDuplicateDataFields.js';
43
+ import RemoveInferiorDataFields from './removeInferiorDataFields.js';
44
+ import ResolvableExtReferences from './resolvable-ext-references-melinda.js';
45
+ import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s.js';
46
+ import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes.js';
47
+ import SortFields from './sortFields.js';
48
+ import SortRelatorTerms from './sortRelatorTerms.js';
49
+ import SortSubfields from './sortSubfields.js';
50
+ import SortTags from './sort-tags.js';
51
51
  // import StripPunctuation from './stripPunctuation'; // Can we add this here? Should be used very cautiosly!
52
- import SubfieldValueNormalizations from './subfieldValueNormalizations';
53
- import SubfieldExclusion from './subfield-exclusion';
54
- import Sync007And300 from './sync-007-and-300';
55
- import TranslateTerms from './translate-terms';
56
- import TypeOfDateF008 from './typeOfDate-008';
57
- import UnicodeDecomposition from './unicode-decomposition';
58
- import UpdateField540 from './update-field-540';
59
- import Urn from './urn';
52
+ import SubfieldValueNormalizations from './subfieldValueNormalizations.js';
53
+ import SubfieldExclusion from './subfield-exclusion.js';
54
+ import Sync007And300 from './sync-007-and-300.js';
55
+ import TranslateTerms from './translate-terms.js';
56
+ import TypeOfDateF008 from './typeOfDate-008.js';
57
+ import UnicodeDecomposition from './unicode-decomposition.js';
58
+ import UpdateField540 from './update-field-540.js';
59
+ import Urn from './urn.js';
60
+
61
+ import {getCounterpart} from './merge-fields/counterpartField.js';
62
+
63
+ import {postprocessRecords} from './merge-fields/mergeOrAddPostprocess.js';
64
+ import {mergeField} from './merge-fields/mergeField.js';
65
+ import {fieldGetOccurrenceNumberPairs, fieldGetUnambiguousOccurrenceNumber, fieldToNormalizedString, fieldsToNormalizedString, get6s,
66
+ isValidSubfield6, recordGetMaxSubfield6OccurrenceNumberAsInteger,
67
+ intToOccurrenceNumberString, resetSubfield6Tag, subfield6ResetOccurrenceNumber, subfield6GetOccurrenceNumber,
68
+ subfield6GetOccurrenceNumberAsInteger} from './subfield6Utils.js';
69
+
70
+ import {getSubfield8LinkingNumber, isValidSubfield8, recordGetAllSubfield8LinkingNumbers, recordGetFieldsWithSubfield8LinkingNumber} from './subfield8Utils.js';
71
+
72
+ import {recordFixRelatorTerms} from './fixRelatorTerms.js';
73
+ import {fieldTrimSubfieldValues} from './normalizeFieldForComparison.js';
74
+ import {baseHasEqualOrHigherEncodingLevel, deleteAllPrepublicationNotesFromField500InNonPubRecord, encodingLevelIsBetterThanPrepublication, getEncodingLevel, isEnnakkotietoField, isEnnakkotietoSubfield} from './prepublicationUtils.js';
75
+ import {melindaFieldSpecs} from '@natlibfi/marc-record-validators-melinda/dist/melindaCustomMergeFields.js';
76
+
77
+ import {cloneAndRemovePunctuation} from './normalizeFieldForComparison.js';
78
+ import {removeWorsePrepubField500s, removeWorsePrepubField594s} from './prepublicationUtils.js';
79
+ import {fieldFixPunctuation} from './punctuation2.js';
80
+ import {recordResetSubfield6OccurrenceNumbers} from './reindexSubfield6OccurenceNumbers.js';
81
+ import {sortAdjacentSubfields} from './sortSubfields.js';
82
+ import {fieldsToString} from './utils.js';
60
83
 
61
84
  export {
62
85
  AccessRights,
@@ -116,5 +139,55 @@ export {
116
139
  UpdateField540,
117
140
  Urn,
118
141
  SortFields, // Keep this penultimate
119
- MergeFields // Run this last *iff* you want to use this at all
142
+ MergeFields, // Run this last *iff* you want to use this at all
143
+
144
+ // Functions for processing record... These should probably go to some other project.
145
+ // Too specific for marc-record-js though...
146
+ // 1. generic low level stuff
147
+ getEncodingLevel,
148
+
149
+ // 2. text normalizations (eg. for similarity comparisons, field merge)
150
+ cloneAndRemovePunctuation,
151
+ fieldFixPunctuation,
152
+ fieldToNormalizedString,
153
+ fieldTrimSubfieldValues,
154
+ fieldsToNormalizedString,
155
+ fieldsToString,
156
+ recordFixRelatorTerms,
157
+ sortAdjacentSubfields,
158
+
159
+ // 3. prepublication stuff
160
+ baseHasEqualOrHigherEncodingLevel,
161
+ deleteAllPrepublicationNotesFromField500InNonPubRecord,
162
+ encodingLevelIsBetterThanPrepublication,
163
+ isEnnakkotietoField,
164
+ isEnnakkotietoSubfield,
165
+ removeWorsePrepubField500s,
166
+ removeWorsePrepubField594s,
167
+
168
+ // 4. subfield $6 related functions
169
+ fieldGetOccurrenceNumberPairs,
170
+ get6s,
171
+ fieldGetUnambiguousOccurrenceNumber,
172
+ intToOccurrenceNumberString,
173
+ isValidSubfield6,
174
+ recordGetMaxSubfield6OccurrenceNumberAsInteger,
175
+ recordResetSubfield6OccurrenceNumbers,
176
+ resetSubfield6Tag,
177
+ subfield6ResetOccurrenceNumber,
178
+ subfield6GetOccurrenceNumber,
179
+ subfield6GetOccurrenceNumberAsInteger,
180
+
181
+ // 5. subfield $8 related functions
182
+ getSubfield8LinkingNumber,
183
+ isValidSubfield8,
184
+ recordGetAllSubfield8LinkingNumbers,
185
+ recordGetFieldsWithSubfield8LinkingNumber,
186
+
187
+ // 6. merge, other
188
+ getCounterpart, // field merge: finds a similar field with which a field can merge
189
+ melindaFieldSpecs, // contains information about the legal fields and subfields, and their repeatability
190
+ mergeField,
191
+ postprocessRecords // clean-up function that cleans up both base and source record (which may be the same)
192
+
120
193
  };
@@ -171,6 +171,18 @@ function normalize245Indicator1(field, record) {
171
171
  field.ind1 = field1XX.length === 0 ? '0' : '1';
172
172
  }
173
173
 
174
+ function noDisplayConstantGenerated520Indicator1(field) {
175
+ if (field.tag !== '520') {
176
+ return;
177
+ }
178
+ const as = field.subfields.filter(sf => sf.code === 'a');
179
+ // Set ind1=8 "no display constant generated" fro certain values (part of MELKEHITYS-2579):
180
+ if (as.length === 1 && ['Abstract.', 'Abstrakt.', 'Abstrakti.', 'Abstract.', 'English Summary.', 'Sammandrag.', 'Tiivistelmä.'].includes(field.subfields[0].value)) {
181
+ field.ind1 = '8';
182
+ }
183
+
184
+ }
185
+
174
186
  function normalize776Indicator2(field) {
175
187
  if (field.tag !== '776') {
176
188
  return;
@@ -229,7 +241,7 @@ function getLanguages(record) {
229
241
 
230
242
  }
231
243
 
232
- export function recordNormalizeIndicators(record) {
244
+ function recordNormalizeIndicators(record) {
233
245
  recordNormalize490(record);
234
246
 
235
247
  // Language is used to handle non-filing indicators
@@ -242,6 +254,7 @@ export function recordNormalizeIndicators(record) {
242
254
  function fieldNormalizeIndicators(field, record, languages) {
243
255
  normalize084Indicator1(field);
244
256
  normalize245Indicator1(field, record);
257
+ noDisplayConstantGenerated520Indicator1(field);
245
258
  normalizeNonFilingIndicator1(field, languages);
246
259
  normalizeNonFilingIndicator2(field, languages);
247
260
  normalize776Indicator2(field);
package/src/isbn-issn.js CHANGED
@@ -1,5 +1,5 @@
1
1
  import ISBN from 'isbn3';
2
- import validateISSN from '@natlibfi/issn-verify';
2
+ import {issn as validateISSN} from '@natlibfi/issn-verify';
3
3
 
4
4
  // handleInvalid: move invalid 020$a to 020$z, and invalid 022$a to 022$y
5
5
  export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
@@ -29,8 +29,14 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
29
29
 
30
30
  function invalidISBN(isbn) {
31
31
  const isbnOnly = getFirstWord(isbn);
32
- const auditedIsbn = ISBN.audit(isbnOnly);
33
- return !auditedIsbn.validIsbn;
32
+ try {
33
+ const auditedIsbn = ISBN.audit(isbnOnly);
34
+ return !auditedIsbn.validIsbn;
35
+ }
36
+ catch {
37
+ return true;
38
+ }
39
+
34
40
  }
35
41
 
36
42
  function invalidSubfield(subfield) {
@@ -238,9 +244,8 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
238
244
  function normalizeIsbnValue(value) {
239
245
  const trimmedValue = getFirstWord(value);
240
246
  //const trimmedValue = trimISBN(value); // NB! This might lose information that should be stored in $q...
241
- const auditResult = ISBN.audit(trimmedValue);
242
- if (!auditResult.validIsbn) {
243
- return undefined;
247
+ if (invalidISBN(trimmedValue)) {
248
+ return undefined; // should this return value (= nothing normalized), not undefined?
244
249
  }
245
250
  const numbersOnly = trimmedValue.replace(/[^0-9Xx]+/ug, '');
246
251
  const parsedIsbn = ISBN.parse(trimmedValue);
@@ -1,4 +1,4 @@
1
- export const melindaCustomMergeFields = {'fields':
1
+ export const melindaFieldSpecs = {'fields':
2
2
  [
3
3
  {
4
4
  'tag': 'leader',
@@ -11,6 +11,7 @@ import {controlSubfieldsPermitMerge} from './controlSubfields.js';
11
11
  import {mergableIndicator1, mergableIndicator2} from './mergableIndicator.js';
12
12
  import {partsAgree} from '../normalizeSubfieldValueForComparison.js';
13
13
  import {normalizeForSamenessCheck, valueCarriesMeaning} from './worldKnowledge.js';
14
+ import { provenanceSubfieldsPermitMerge } from './dataProvenance.js';
14
15
 
15
16
  const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:mergeField:counterpart');
16
17
  //const debugData = debug.extend('data');
@@ -375,6 +376,11 @@ function syntacticallyMergablePair(baseField, sourceField, config) {
375
376
  return false;
376
377
  }
377
378
 
379
+ if (!provenanceSubfieldsPermitMerge(baseField, sourceField)) {
380
+ nvdebug('non-mergable (reason: data provenance subfield)', debugDev);
381
+ return false;
382
+ }
383
+
378
384
  // NB! field1.tag and field2.tag might differ (1XX vs 7XX). Therefore required subfields might theoretically differ as well.
379
385
  // Note: Theoretically 260 $efg vs 264 with IND2=3 has already been handled by the preprocessor.
380
386
  // Thus check both:
@@ -0,0 +1,41 @@
1
+ // See https://www.loc.gov/marc/bibliographic/bdapndxj.html for details
2
+
3
+ import {subfieldArraysContainSameData} from "../utils.js";
4
+
5
+ export function tagToDataProvenanceSubfieldCode(tag) {
6
+ if ( ['533', '800', '810', '811', '830'].includes(tag)) {
7
+ return 'y';
8
+ }
9
+ if ( tag === '856' || tag === '857' ) {
10
+ return 'e';
11
+ }
12
+
13
+ if ( tag.match(/^7[678]/u) ) {
14
+ return 'l'
15
+ }
16
+
17
+ if ( tag.match(/^00/u)) {
18
+ return undefined;
19
+ }
20
+ return '7';
21
+ }
22
+
23
+
24
+ export function provenanceSubfieldsPermitMerge(baseField, sourceField) {
25
+ const provinanceSubfieldCode = tagToDataProvenanceSubfieldCode(baseField.tag);
26
+ if (!baseField.subfields) {
27
+ return true;
28
+ }
29
+ if (provinanceSubfieldCode === undefined) {
30
+ return false;
31
+ }
32
+
33
+ const baseProvinanceSubfields = baseField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
34
+ const sourceProvinanceSubfields = sourceField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
35
+
36
+ // Currently we just compare two arrays. Later on we might do something more sophisticated with specific $7 data provenance category/relationship codes,
37
+ // or actual values.
38
+
39
+ return subfieldArraysContainSameData(baseProvinanceSubfields, sourceProvinanceSubfields);
40
+
41
+ }
@@ -18,16 +18,25 @@ import {mergeConfig as defaultConfig} from './mergeConfig.js';
18
18
 
19
19
  //const defaultConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '..', '..', 'src', 'merge-fields', 'config.json'), 'utf8'));
20
20
 
21
- export default function () {
21
+ export default function (defaultTagPattern = undefined) {
22
22
 
23
23
  return {
24
24
  description, validate, fix
25
25
  };
26
26
 
27
+ function getTagPattern(config) {
28
+ if (config && config.tagPattern) {
29
+ return config.tagPattern;
30
+ }
31
+ if (defaultTagPattern) { // Used by tests
32
+ return defaultTagPattern;
33
+ }
34
+ return '^[1678](?:00|10|11|30)$';
35
+ }
27
36
 
28
37
  function mergeFieldsWithinRecord(record, config) {
29
38
  //const candFields = record.fields.toReversed(); // Node 20+ only! Filter via config?
30
- const fields = config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
39
+ const fields = record.get(getTagPattern(config)); // config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
31
40
 
32
41
  fields.reverse();
33
42
  const mergedField = fields.find(f => mergeField(record, record, f, config));
@@ -106,8 +106,8 @@ function skipMergeField(baseRecord, sourceField, config) {
106
106
  return true;
107
107
  }
108
108
 
109
- // Skip duplicate field:
110
- if (baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
109
+ // Skip duplicate field when merging two records (NB! internal merge merges/removes the duplicate field):
110
+ if (!baseRecord.internalMerge && baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
111
111
  nvdebug(`skipMergeField(): field '${fieldToString(sourceField)}' already exists! No merge required!`, debugDev);
112
112
  sourceField.deleted = 1;
113
113
  return true;
@@ -4,6 +4,7 @@ import validatorFactory from './merge-fields/index.js';
4
4
  import {READERS} from '@natlibfi/fixura';
5
5
  import generateTests from '@natlibfi/fixugen';
6
6
  import createDebugLogger from 'debug';
7
+ import { nvdebug } from './utils.js';
7
8
 
8
9
  generateTests({
9
10
  callback,
@@ -29,17 +30,20 @@ async function testValidatorFactory() {
29
30
  assert.equal(typeof validator.validate, 'function');
30
31
  }
31
32
 
32
- async function callback({getFixture, enabled = true, fix = false}) {
33
+ async function callback({getFixture, enabled = true, fix = false, tagPattern = false}) {
33
34
  if (enabled === false) {
34
35
  debug('TEST SKIPPED!');
35
36
  return;
36
37
  }
37
38
 
38
- const validator = await validatorFactory();
39
+ nvdebug(`TAG PATTERN: ${tagPattern}`);
40
+
41
+ const validator = await validatorFactory(tagPattern);
39
42
  const record = new MarcRecord(getFixture('record.json'));
40
43
  const expectedResult = getFixture('expectedResult.json');
41
44
  // console.log(expectedResult); // eslint-disable-line
42
45
 
46
+ // NB! This validator will only use tags matching /^[1678](?:00|10|11|30)$/ unless tagPattern is specified!
43
47
  if (!fix) {
44
48
  const result = await validator.validate(record);
45
49
  assert.deepEqual(result, expectedResult);
@@ -127,7 +127,7 @@ function extractAllPrintData(relevantFields) {
127
127
  }
128
128
 
129
129
 
130
- export function mergeLisapainokset(record) {
130
+ function mergeLisapainokset(record) {
131
131
  const relevantFields = getRelevantFields(record);
132
132
  if (relevantFields.length < 2) {
133
133
  return;
@@ -19,6 +19,9 @@ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:no
19
19
  const debugDev = debug.extend('dev');
20
20
 
21
21
  export function isEnnakkotietoSubfieldG(subfield) {
22
+ if (valuelessSubfield(subfield)) {
23
+ return false;
24
+ }
22
25
  if (subfield.code !== 'g') {
23
26
  return false;
24
27
  }
@@ -116,6 +119,9 @@ function subfieldValueLowercase(value, subfieldCode, tag) {
116
119
  }
117
120
 
118
121
  function subfieldLowercase(sf, tag) {
122
+ if (valuelessSubfield(sf)) {
123
+ return;
124
+ }
119
125
  sf.value = subfieldValueLowercase(sf.value, sf.code, tag);
120
126
  }
121
127
 
@@ -148,6 +154,10 @@ function hack490SubfieldA(field) {
148
154
 
149
155
  // NB! This won't work, if the punctuation has not been stripped beforehand!
150
156
  function removeSarja(subfield) {
157
+ if (valuelessSubfield(subfield)) {
158
+ return;
159
+ }
160
+
151
161
  if (subfield.code !== 'a') {
152
162
  return;
153
163
  }
@@ -188,6 +198,9 @@ function normalizeISBN(field) {
188
198
  relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));
189
199
 
190
200
  function normalizeIsbnSubfield(sf) {
201
+ if (valuelessSubfield(sf)) {
202
+ return;
203
+ }
191
204
  //nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);
192
205
  sf.value = sf.value.replace(/-/ug, '');
193
206
  sf.value = sf.value.replace(/x/u, 'X');
@@ -202,6 +215,9 @@ function fieldSpecificHacks(field) {
202
215
 
203
216
  export function fieldTrimSubfieldValues(field) {
204
217
  field.subfields?.forEach((sf) => {
218
+ if (valuelessSubfield(sf)) {
219
+ return;
220
+ }
205
221
  sf.value = sf.value.replace(/^[ \t\n]+/u, '');
206
222
  sf.value = sf.value.replace(/[ \t\n]+$/u, '');
207
223
  sf.value = sf.value.replace(/[ \t\n]+/gu, ' ');
@@ -212,6 +228,9 @@ function fieldRemoveDecomposedDiacritics(field) {
212
228
  // Raison d'être/motivation: "Sirén" and diacriticless "Siren" might refer to a same surname, so this normalization
213
229
  // allows us to compare authors and avoid duplicate fields.
214
230
  field.subfields.forEach((sf) => {
231
+ if (valuelessSubfield(sf)) {
232
+ return;
233
+ }
215
234
  sf.value = removeDecomposedDiacritics(sf.value);
216
235
  });
217
236
  }
@@ -297,6 +316,9 @@ export function cloneAndNormalizeFieldForComparison(field) {
297
316
  return clonedField;
298
317
  }
299
318
  clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?
319
+ if (valuelessSubfield(sf)) {
320
+ return;
321
+ }
300
322
  sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);
301
323
  sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);
302
324
  });
@@ -318,3 +340,7 @@ function fieldSkipNormalization(field) {
318
340
  }
319
341
  return false;
320
342
  }
343
+
344
+ function valuelessSubfield(sf) {
345
+ return sf.value === undefined;
346
+ }
@@ -10,9 +10,10 @@
10
10
  * (They are jumped over when looking for next (non-controlfield subfield)
11
11
  */
12
12
  import {validateSingleField} from './ending-punctuation.js';
13
+ import {tagToDataProvenanceSubfieldCode} from './merge-fields/dataProvenance.js';
13
14
  import {fieldGetUnambiguousTag} from './subfield6Utils.js';
14
15
  //import createDebugLogger from 'debug';
15
- import {fieldToString, nvdebug} from './utils.js';
16
+ import {fieldToString, isControlSubfieldCode, nvdebug} from './utils.js';
16
17
  import clone from 'clone';
17
18
 
18
19
  //const debug = createDebugLogger('debug/punctuation2');
@@ -49,12 +50,19 @@ export default function () {
49
50
  }
50
51
  }
51
52
 
52
- function isControlSubfield(subfield) {
53
- return ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'].includes(subfield.code);
53
+
54
+
55
+ function isIrrelevantSubfield(subfield, tag) {
56
+ const dataProvenanceSubfieldCode = tagToDataProvenanceSubfieldCode(tag);
57
+ if (subfield.code === dataProvenanceSubfieldCode) {
58
+ return true;
59
+ }
60
+ return isControlSubfieldCode(subfield.code); // Currently this contains other stuff as well ($3, $4, $7, $9...)
54
61
  }
55
62
 
63
+
56
64
  function getNextRelevantSubfield(field, currSubfieldIndex) {
57
- return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isControlSubfield(subfield));
65
+ return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isIrrelevantSubfield(subfield, field.tag));
58
66
  }
59
67
 
60
68
  export function fieldGetFixedString(field, add = true) {
@@ -155,7 +163,8 @@ const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remov
155
163
  const linkingEntryRemoveWhatever = [
156
164
  {'code': 'i', 'followedBy': 'at', 'remove': / ?:$/u}, // ':'
157
165
  {'code': 'at', 'remove': /\.$/u},
158
- {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz', 'remove': /\. -$/u}
166
+ // Only ". -" separator is still used in music. We can strip it, but can only create the non-music punctuation!
167
+ {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz#', 'remove': /\. -$/u}
159
168
  ];
160
169
 
161
170
 
@@ -332,7 +332,10 @@ function deriveIndividualDeletables(record) {
332
332
  // MET-575 (merge: applies in postprocessing)
333
333
  const inferiorTerms = getPrepublicationTerms(currString);
334
334
 
335
- const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms];
335
+ // MELKEHITYS-3277-ish: non-AI is better than AI (a rare case where longer version is inferior):
336
+ const aiBased = `${currString} ‡7 (dpenmw)AI`;
337
+
338
+ const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms, aiBased];
336
339
 
337
340
  if (subsets.length) {
338
341
  return processTodoList([...stillToDo, ...moreToDo], newDeletables);
@@ -41,7 +41,7 @@ export default function () {
41
41
  }
42
42
  }
43
43
 
44
- export function recordFixSubfield6OccurrenceNumbers(record) {
44
+ function recordFixSubfield6OccurrenceNumbers(record) {
45
45
  const fieldsContainingSubfield6 = record.fields.filter(field => fieldHasSubfield(field, '6'));
46
46
  const orphanedFields = getOrphanedFields(fieldsContainingSubfield6);
47
47
 
@@ -67,9 +67,9 @@ export default function (defaultTagPattern) {
67
67
 
68
68
  // X00, X10, X11 and X130 could also for their own sets...
69
69
  // (ouch! sometimes $c comes after $d...): LoC: 100 0# ‡a Black Foot, ‡c Chief, ‡d d. 1877 ‡c (Spirit)
70
- const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '5', '9']; // skip $g. Can't remember why, though...
71
- const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '5', '9']; // somewhat iffy
72
- const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '5', '9'];
70
+ const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '1', '5', '9']; // skip $g. Can't remember why, though...
71
+ const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '1', '5', '9']; // somewhat iffy
72
+ const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '1', '5', '9'];
73
73
  const sortOrderFor7XX = ['8', '7', 'i', 'a', 's', 't', 'b', 'c', 'd', 'm', 'h', 'k', 'o', 'x', 'z', 'g', 'q', 'w'];
74
74
  const sortOrderFor246 = ['i', 'a', 'b', 'n', 'p', 'f', '5', '9']; // Used by field 946 as well
75
75
 
@@ -89,7 +89,7 @@ const subfieldSortOrder = [
89
89
  {'tag': '245', 'sortOrder': ['a', 'b', 'n', 'p', 'k', 'f', 'c']},
90
90
  {'tag': '246', 'sortOrder': sortOrderFor246},
91
91
  {'tag': '382', 'sortOrder': ['a']},
92
- {'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0']},
92
+ {'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0', '1']},
93
93
  {'tag': '386', 'sortOrder': ['8', 'm', 'n', 'a']},
94
94
  {'tag': '490', 'sortOrder': ['a', 'x', 'y', 'v', 'l']},
95
95
  {'tag': '505', 'sortOrder': ['a']},
@@ -218,7 +218,9 @@ export function sortAdjacentSubfields(field, externalSortOrder = []) {
218
218
 
219
219
 
220
220
  const finnishWay = twoBeforeZero(field);
221
- const controlSubfieldOrder = finnishWay ? ['8', '7', '3', 'a', '4', '2', '0', '1', '5', '9'] : ['8', '7', '3', 'a', '4', '0', '2', '1', '5', '9'];
221
+
222
+ // Note: 760-789: '7' comes way earlier (after '6' and '8')
223
+ const controlSubfieldOrder = finnishWay ? ['8', '3', 'a', '4', '2', '0', '1', '7', '5', '9'] : ['8', '7', '3', 'a', '4', '0', '1', '2', '7', '5', '9'];
222
224
  swapSubfields(field, controlSubfieldOrder);
223
225
 
224
226
  const sortOrderForField = externalSortOrder.length > 0 ? externalSortOrder : getSubfieldSortOrder(field);