@natlibfi/marc-record-validators-melinda 11.6.7 → 12.0.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/melinda-node-tests.yml +1 -1
- package/dist/access-rights.js +63 -91
- package/dist/access-rights.js.map +7 -1
- package/dist/access-rights.test.js +137 -0
- package/dist/access-rights.test.js.map +7 -0
- package/dist/addMissingField041.js +21 -53
- package/dist/addMissingField041.js.map +7 -1
- package/dist/addMissingField041.test.js +39 -0
- package/dist/addMissingField041.test.js.map +7 -0
- package/dist/addMissingField336.js +99 -191
- package/dist/addMissingField336.js.map +7 -1
- package/dist/addMissingField336.test.js +39 -0
- package/dist/addMissingField336.test.js.map +7 -0
- package/dist/addMissingField337.js +63 -132
- package/dist/addMissingField337.js.map +7 -1
- package/dist/addMissingField337.test.js +39 -0
- package/dist/addMissingField337.test.js.map +7 -0
- package/dist/addMissingField338.js +147 -253
- package/dist/addMissingField338.js.map +7 -1
- package/dist/addMissingField338.test.js +39 -0
- package/dist/addMissingField338.test.js.map +7 -0
- package/dist/cyrillux-usemarcon-replacement.js +119 -272
- package/dist/cyrillux-usemarcon-replacement.js.map +7 -1
- package/dist/cyrillux-usemarcon-replacement.test.js +46 -0
- package/dist/cyrillux-usemarcon-replacement.test.js.map +7 -0
- package/dist/cyrillux.js +119 -223
- package/dist/cyrillux.js.map +7 -1
- package/dist/cyrillux.test.js +39 -0
- package/dist/cyrillux.test.js.map +7 -0
- package/dist/disambiguateSeriesStatements.js +40 -81
- package/dist/disambiguateSeriesStatements.js.map +7 -1
- package/dist/disambiguateSeriesStatements.test.js +44 -0
- package/dist/disambiguateSeriesStatements.test.js.map +7 -0
- package/dist/double-commas.js +7 -14
- package/dist/double-commas.js.map +7 -1
- package/dist/double-commas.test.js +48 -0
- package/dist/double-commas.test.js.map +7 -0
- package/dist/duplicates-ind1.js +10 -31
- package/dist/duplicates-ind1.js.map +7 -1
- package/dist/duplicates-ind1.test.js +40 -0
- package/dist/duplicates-ind1.test.js.map +7 -0
- package/dist/empty-fields.js +10 -22
- package/dist/empty-fields.js.map +7 -1
- package/dist/empty-fields.test.js +129 -0
- package/dist/empty-fields.test.js.map +7 -0
- package/dist/ending-punctuation-conf.js +871 -769
- package/dist/ending-punctuation-conf.js.map +7 -1
- package/dist/ending-punctuation.js +84 -167
- package/dist/ending-punctuation.js.map +7 -1
- package/dist/ending-punctuation.test.js +2290 -0
- package/dist/ending-punctuation.test.js.map +7 -0
- package/dist/ending-whitespace.js +10 -35
- package/dist/ending-whitespace.js.map +7 -1
- package/dist/ending-whitespace.test.js +38 -0
- package/dist/ending-whitespace.test.js.map +7 -0
- package/dist/field-008-18-34-character-groups.js +40 -125
- package/dist/field-008-18-34-character-groups.js.map +7 -1
- package/dist/field-008-18-34-character-groups.test.js +45 -0
- package/dist/field-008-18-34-character-groups.test.js.map +7 -0
- package/dist/field-505-separators.js +19 -39
- package/dist/field-505-separators.js.map +7 -1
- package/dist/field-505-separators.test.js +45 -0
- package/dist/field-505-separators.test.js.map +7 -0
- package/dist/field-521-fix.js +19 -47
- package/dist/field-521-fix.js.map +7 -1
- package/dist/field-521-fix.test.js +44 -0
- package/dist/field-521-fix.test.js.map +7 -0
- package/dist/field-exclusion.js +37 -91
- package/dist/field-exclusion.js.map +7 -1
- package/dist/field-exclusion.test.js +821 -0
- package/dist/field-exclusion.test.js.map +7 -0
- package/dist/field-structure.js +52 -104
- package/dist/field-structure.js.map +7 -1
- package/dist/field-structure.test.js +587 -0
- package/dist/field-structure.test.js.map +7 -0
- package/dist/field33XUtils.js +119 -503
- package/dist/field33XUtils.js.map +7 -1
- package/dist/fields-present.js +11 -23
- package/dist/fields-present.js.map +7 -1
- package/dist/fields-present.test.js +95 -0
- package/dist/fields-present.test.js.map +7 -0
- package/dist/fix-33X.js +393 -431
- package/dist/fix-33X.js.map +7 -1
- package/dist/fix-33X.test.js +39 -0
- package/dist/fix-33X.test.js.map +7 -0
- package/dist/fix-country-codes.js +20 -50
- package/dist/fix-country-codes.js.map +7 -1
- package/dist/fix-country-codes.test.js +44 -0
- package/dist/fix-country-codes.test.js.map +7 -0
- package/dist/fix-language-codes.js +23 -53
- package/dist/fix-language-codes.js.map +7 -1
- package/dist/fix-language-codes.test.js +38 -0
- package/dist/fix-language-codes.test.js.map +7 -0
- package/dist/fixRelatorTerms.js +82 -209
- package/dist/fixRelatorTerms.js.map +7 -1
- package/dist/fixRelatorTerms.test.js +44 -0
- package/dist/fixRelatorTerms.test.js.map +7 -0
- package/dist/fixed-fields.js +21 -30
- package/dist/fixed-fields.js.map +7 -1
- package/dist/fixed-fields.test.js +87 -0
- package/dist/fixed-fields.test.js.map +7 -0
- package/dist/identical-fields.js +8 -24
- package/dist/identical-fields.js.map +7 -1
- package/dist/identical-fields.test.js +119 -0
- package/dist/identical-fields.test.js.map +7 -0
- package/dist/index.js +119 -413
- package/dist/index.js.map +7 -1
- package/dist/indicator-fixes.js +57 -95
- package/dist/indicator-fixes.js.map +7 -1
- package/dist/indicator-fixes.test.js +42 -0
- package/dist/indicator-fixes.test.js.map +7 -0
- package/dist/isbn-issn.js +66 -126
- package/dist/isbn-issn.js.map +7 -1
- package/dist/isbn-issn.test.js +398 -0
- package/dist/isbn-issn.test.js.map +7 -0
- package/dist/item-language.js +32 -65
- package/dist/item-language.js.map +7 -1
- package/dist/item-language.test.js +322 -0
- package/dist/item-language.test.js.map +7 -0
- package/dist/melindaCustomMergeFields.js +5182 -11233
- package/dist/melindaCustomMergeFields.js.map +7 -1
- package/dist/merge-fields/controlSubfields.js +75 -142
- package/dist/merge-fields/controlSubfields.js.map +7 -1
- package/dist/merge-fields/counterpartField.js +182 -379
- package/dist/merge-fields/counterpartField.js.map +7 -1
- package/dist/merge-fields/index.js +15 -49
- package/dist/merge-fields/index.js.map +7 -1
- package/dist/merge-fields/mergableIndicator.js +18 -51
- package/dist/merge-fields/mergableIndicator.js.map +7 -1
- package/dist/merge-fields/mergableTag.js +78 -30
- package/dist/merge-fields/mergableTag.js.map +7 -1
- package/dist/merge-fields/mergeConfig.js +66 -171
- package/dist/merge-fields/mergeConfig.js.map +7 -1
- package/dist/merge-fields/mergeConstraints.js +323 -1214
- package/dist/merge-fields/mergeConstraints.js.map +7 -1
- package/dist/merge-fields/mergeField.js +47 -111
- package/dist/merge-fields/mergeField.js.map +7 -1
- package/dist/merge-fields/mergeIndicator.js +64 -118
- package/dist/merge-fields/mergeIndicator.js.map +7 -1
- package/dist/merge-fields/mergeOrAddPostprocess.js +14 -38
- package/dist/merge-fields/mergeOrAddPostprocess.js.map +7 -1
- package/dist/merge-fields/mergeOrAddSubfield.js +62 -104
- package/dist/merge-fields/mergeOrAddSubfield.js.map +7 -1
- package/dist/merge-fields/mergeSubfield.js +47 -95
- package/dist/merge-fields/mergeSubfield.js.map +7 -1
- package/dist/merge-fields/removeDuplicateSubfields.js +18 -31
- package/dist/merge-fields/removeDuplicateSubfields.js.map +7 -1
- package/dist/merge-fields/worldKnowledge.js +15 -40
- package/dist/merge-fields/worldKnowledge.js.map +7 -1
- package/dist/merge-fields.test.js +44 -0
- package/dist/merge-fields.test.js.map +7 -0
- package/dist/mergeField500Lisapainokset.js +28 -57
- package/dist/mergeField500Lisapainokset.js.map +7 -1
- package/dist/mergeField500Lisapainokset.test.js +44 -0
- package/dist/mergeField500Lisapainokset.test.js.map +7 -0
- package/dist/mergeRelatorTermFields.js +33 -69
- package/dist/mergeRelatorTermFields.js.map +7 -1
- package/dist/mergeRelatorTermFields.test.js +44 -0
- package/dist/mergeRelatorTermFields.test.js.map +7 -0
- package/dist/modernize-502.js +23 -55
- package/dist/modernize-502.js.map +7 -1
- package/dist/modernize-502.test.js +38 -0
- package/dist/modernize-502.test.js.map +7 -0
- package/dist/multiple-subfield-0.js +23 -48
- package/dist/multiple-subfield-0.js.map +7 -1
- package/dist/multiple-subfield-0.test.js +44 -0
- package/dist/multiple-subfield-0.test.js.map +7 -0
- package/dist/non-breaking-space.js +11 -32
- package/dist/non-breaking-space.js.map +7 -1
- package/dist/non-breaking-space.test.js +38 -0
- package/dist/non-breaking-space.test.js.map +7 -0
- package/dist/normalize-dashes.js +18 -37
- package/dist/normalize-dashes.js.map +7 -1
- package/dist/normalize-dashes.test.js +44 -0
- package/dist/normalize-dashes.test.js.map +7 -0
- package/dist/normalize-identifiers.js +54 -140
- package/dist/normalize-identifiers.js.map +7 -1
- package/dist/normalize-identifiers.test.js +44 -0
- package/dist/normalize-identifiers.test.js.map +7 -0
- package/dist/normalize-qualifying-information.js +23 -48
- package/dist/normalize-qualifying-information.js.map +7 -1
- package/dist/normalize-qualifying-information.test.js +44 -0
- package/dist/normalize-qualifying-information.test.js.map +7 -0
- package/dist/normalize-utf8-diacritics.js +19 -105
- package/dist/normalize-utf8-diacritics.js.map +7 -1
- package/dist/normalize-utf8-diacritics.test.js +44 -0
- package/dist/normalize-utf8-diacritics.test.js.map +7 -0
- package/dist/normalizeFieldForComparison.js +67 -158
- package/dist/normalizeFieldForComparison.js.map +7 -1
- package/dist/normalizeSubfieldValueForComparison.js +37 -77
- package/dist/normalizeSubfieldValueForComparison.js.map +7 -1
- package/dist/prepublicationUtils.js +58 -111
- package/dist/prepublicationUtils.js.map +7 -1
- package/dist/punctuation/index.js +56 -72
- package/dist/punctuation/index.js.map +7 -1
- package/dist/punctuation/rules/aut.js +372 -331
- package/dist/punctuation/rules/aut.js.map +7 -1
- package/dist/punctuation/rules/bib.js +420 -373
- package/dist/punctuation/rules/bib.js.map +7 -1
- package/dist/punctuation/rules/index.js +7 -21
- package/dist/punctuation/rules/index.js.map +7 -1
- package/dist/punctuation.test.js +44 -0
- package/dist/punctuation.test.js.map +7 -0
- package/dist/punctuation2.js +251 -800
- package/dist/punctuation2.js.map +7 -1
- package/dist/punctuation2.test.js +44 -0
- package/dist/punctuation2.test.js.map +7 -0
- package/dist/reindexSubfield6OccurenceNumbers.js +61 -96
- package/dist/reindexSubfield6OccurenceNumbers.js.map +7 -1
- package/dist/reindexSubfield6OccurenceNumbers.test.js +44 -0
- package/dist/reindexSubfield6OccurenceNumbers.test.js.map +7 -0
- package/dist/removeDuplicateDataFields.js +102 -202
- package/dist/removeDuplicateDataFields.js.map +7 -1
- package/dist/removeDuplicateDataFields.test.js +44 -0
- package/dist/removeDuplicateDataFields.test.js.map +7 -0
- package/dist/removeInferiorDataFields.js +103 -227
- package/dist/removeInferiorDataFields.js.map +7 -1
- package/dist/removeInferiorDataFields.test.js +44 -0
- package/dist/removeInferiorDataFields.test.js.map +7 -0
- package/dist/resolvable-ext-references-melinda.js +25 -60
- package/dist/resolvable-ext-references-melinda.js.map +7 -1
- package/dist/resolvable-ext-references-melinda.test.js +160 -0
- package/dist/resolvable-ext-references-melinda.test.js.map +7 -0
- package/dist/resolveOrphanedSubfield6s.js +33 -64
- package/dist/resolveOrphanedSubfield6s.js.map +7 -1
- package/dist/resolveOrphanedSubfield6s.test.js +44 -0
- package/dist/resolveOrphanedSubfield6s.test.js.map +7 -0
- package/dist/sanitize-vocabulary-source-codes.js +27 -55
- package/dist/sanitize-vocabulary-source-codes.js.map +7 -1
- package/dist/sanitize-vocabulary-source-codes.test.js +45 -0
- package/dist/sanitize-vocabulary-source-codes.test.js.map +7 -0
- package/dist/sort-tags.js +13 -25
- package/dist/sort-tags.js.map +7 -1
- package/dist/sort-tags.test.js +261 -0
- package/dist/sort-tags.test.js.map +7 -0
- package/dist/sortFields.js +152 -222
- package/dist/sortFields.js.map +7 -1
- package/dist/sortFields.test.js +44 -0
- package/dist/sortFields.test.js.map +7 -0
- package/dist/sortRelatorTerms.js +30 -68
- package/dist/sortRelatorTerms.js.map +7 -1
- package/dist/sortRelatorTerms.test.js +44 -0
- package/dist/sortRelatorTerms.test.js.map +7 -0
- package/dist/sortSubfields.js +102 -255
- package/dist/sortSubfields.js.map +7 -1
- package/dist/sortSubfields.test.js +44 -0
- package/dist/sortSubfields.test.js.map +7 -0
- package/dist/stripPunctuation.js +13 -36
- package/dist/stripPunctuation.js.map +7 -1
- package/dist/stripPunctuation.test.js +44 -0
- package/dist/stripPunctuation.test.js.map +7 -0
- package/dist/subfield-exclusion.js +28 -75
- package/dist/subfield-exclusion.js.map +7 -1
- package/dist/subfield-exclusion.test.js +471 -0
- package/dist/subfield-exclusion.test.js.map +7 -0
- package/dist/subfield6Utils.js +107 -269
- package/dist/subfield6Utils.js.map +7 -1
- package/dist/subfield8Utils.js +26 -50
- package/dist/subfield8Utils.js.map +7 -1
- package/dist/subfieldValueNormalizations.js +40 -74
- package/dist/subfieldValueNormalizations.js.map +7 -1
- package/dist/subfieldValueNormalizations.test.js +45 -0
- package/dist/subfieldValueNormalizations.test.js.map +7 -0
- package/dist/sync-007-and-300.js +22 -53
- package/dist/sync-007-and-300.js.map +7 -1
- package/dist/sync-007-and-300.test.js +44 -0
- package/dist/sync-007-and-300.test.js.map +7 -0
- package/dist/translate-terms.js +67 -155
- package/dist/translate-terms.js.map +7 -1
- package/dist/translate-terms.test.js +44 -0
- package/dist/translate-terms.test.js.map +7 -0
- package/dist/typeOfDate-008.js +10 -25
- package/dist/typeOfDate-008.js.map +7 -1
- package/dist/typeOfDate-008.test.js +40 -0
- package/dist/typeOfDate-008.test.js.map +7 -0
- package/dist/unicode-decomposition.js +94 -107
- package/dist/unicode-decomposition.js.map +7 -1
- package/dist/unicode-decomposition.test.js +94 -0
- package/dist/unicode-decomposition.test.js.map +7 -0
- package/dist/update-field-540.js +30 -75
- package/dist/update-field-540.js.map +7 -1
- package/dist/update-field-540.test.js +44 -0
- package/dist/update-field-540.test.js.map +7 -0
- package/dist/urn.js +55 -128
- package/dist/urn.js.map +7 -1
- package/dist/urn.test.js +44 -0
- package/dist/urn.test.js.map +7 -0
- package/dist/utils.js +72 -126
- package/dist/utils.js.map +7 -1
- package/eslint.config.mjs +1 -2
- package/package.json +21 -93
- package/src/access-rights.js +1 -1
- package/src/{access-rights.spec.js → access-rights.test.js} +9 -10
- package/src/addMissingField041.js +1 -1
- package/src/{addMissingField336.spec.js → addMissingField041.test.js} +13 -14
- package/src/addMissingField336.js +3 -3
- package/src/{addMissingField041.spec.js → addMissingField336.test.js} +13 -14
- package/src/addMissingField337.js +2 -2
- package/src/{addMissingField337.spec.js → addMissingField337.test.js} +13 -14
- package/src/addMissingField338.js +2 -2
- package/src/{addMissingField338.spec.js → addMissingField338.test.js} +13 -14
- package/src/cyrillux-usemarcon-replacement.js +18 -18
- package/src/cyrillux-usemarcon-replacement.test.js +55 -0
- package/src/cyrillux.js +19 -12
- package/src/{cyrillux.spec.js → cyrillux.test.js} +13 -14
- package/src/disambiguateSeriesStatements.js +2 -2
- package/src/{disambiguateSeriesStatements.spec.js → disambiguateSeriesStatements.test.js} +12 -13
- package/src/double-commas.js +1 -1
- package/src/{double-commas.spec.js → double-commas.test.js} +9 -11
- package/src/duplicates-ind1.js +1 -1
- package/src/{duplicates-ind1.spec.js → duplicates-ind1.test.js} +12 -13
- package/src/{empty-fields.spec.js → empty-fields.test.js} +11 -13
- package/src/ending-punctuation.js +1 -1
- package/src/{ending-punctuation.spec.js → ending-punctuation.test.js} +172 -173
- package/src/{ending-whitespace.spec.js → ending-whitespace.test.js} +12 -13
- package/src/field-008-18-34-character-groups.js +2 -2
- package/src/{field-008-18-34-character-groups.spec.js → field-008-18-34-character-groups.test.js} +13 -13
- package/src/field-505-separators.js +3 -3
- package/src/{field-505-separators.spec.js → field-505-separators.test.js} +16 -14
- package/src/field-521-fix.js +2 -2
- package/src/{field-521-fix.spec.js → field-521-fix.test.js} +12 -13
- package/src/field-exclusion.js +1 -1
- package/src/{field-exclusion.spec.js → field-exclusion.test.js} +60 -57
- package/src/{field-structure.spec.js → field-structure.test.js} +29 -29
- package/src/{fields-present.spec.js → fields-present.test.js} +12 -15
- package/src/fix-33X.js +4 -4
- package/src/{fix-33X.spec.js → fix-33X.test.js} +13 -14
- package/src/fix-country-codes.js +1 -1
- package/src/{fix-country-codes.spec.js → fix-country-codes.test.js} +12 -13
- package/src/fix-language-codes.js +5 -5
- package/src/{fix-language-codes.spec.js → fix-language-codes.test.js} +12 -13
- package/src/fixRelatorTerms.js +5 -5
- package/src/{fixRelatorTerms.spec.js → fixRelatorTerms.test.js} +13 -13
- package/src/{fixed-fields.spec.js → fixed-fields.test.js} +11 -14
- package/src/identical-fields.js +1 -1
- package/src/{identical-fields.spec.js → identical-fields.test.js} +9 -11
- package/src/indicator-fixes.js +3 -3
- package/src/{indicator-fixes.spec.js → indicator-fixes.test.js} +9 -12
- package/src/isbn-issn.js +1 -1
- package/src/{isbn-issn.spec.js → isbn-issn.test.js} +20 -22
- package/src/{item-language.spec.js → item-language.test.js} +21 -22
- package/src/merge-fields/controlSubfields.js +1 -1
- package/src/merge-fields/counterpartField.js +8 -9
- package/src/merge-fields/index.js +1 -1
- package/src/merge-fields/mergableIndicator.js +1 -1
- package/src/merge-fields/mergeField.js +6 -6
- package/src/merge-fields/mergeIndicator.js +1 -1
- package/src/merge-fields/mergeOrAddPostprocess.js +4 -4
- package/src/merge-fields/mergeOrAddSubfield.js +2 -2
- package/src/merge-fields/mergeSubfield.js +4 -4
- package/src/merge-fields/removeDuplicateSubfields.js +2 -2
- package/src/{merge-fields.spec.js → merge-fields.test.js} +12 -13
- package/src/{mergeField500Lisapainokset.spec.js → mergeField500Lisapainokset.test.js} +12 -13
- package/src/mergeRelatorTermFields.js +5 -7
- package/src/{mergeRelatorTermFields.spec.js → mergeRelatorTermFields.test.js} +12 -13
- package/src/modernize-502.js +1 -1
- package/src/{modernize-502.spec.js → modernize-502.test.js} +12 -13
- package/src/multiple-subfield-0.js +3 -3
- package/src/{multiple-subfield-0.spec.js → multiple-subfield-0.test.js} +13 -13
- package/src/{non-breaking-space.spec.js → non-breaking-space.test.js} +12 -13
- package/src/normalize-dashes.js +2 -2
- package/src/{normalize-dashes.spec.js → normalize-dashes.test.js} +12 -13
- package/src/normalize-identifiers.js +1 -1
- package/src/{normalize-identifiers.spec.js → normalize-identifiers.test.js} +12 -13
- package/src/normalize-qualifying-information.js +2 -2
- package/src/{normalize-qualifying-information.spec.js → normalize-qualifying-information.test.js} +12 -13
- package/src/normalize-utf8-diacritics.js +2 -2
- package/src/{normalize-utf8-diacritics.spec.js → normalize-utf8-diacritics.test.js} +13 -13
- package/src/normalizeFieldForComparison.js +6 -6
- package/src/normalizeSubfieldValueForComparison.js +1 -1
- package/src/prepublicationUtils.js +4 -4
- package/src/punctuation/index.js +1 -1
- package/src/punctuation/rules/index.js +2 -2
- package/src/{punctuation.spec.js → punctuation.test.js} +12 -13
- package/src/punctuation2.js +4 -4
- package/src/{punctuation2.spec.js → punctuation2.test.js} +12 -13
- package/src/reindexSubfield6OccurenceNumbers.js +5 -7
- package/src/{reindexSubfield6OccurenceNumbers.spec.js → reindexSubfield6OccurenceNumbers.test.js} +12 -13
- package/src/removeDuplicateDataFields.js +11 -19
- package/src/{removeDuplicateDataFields.spec.js → removeDuplicateDataFields.test.js} +12 -13
- package/src/removeInferiorDataFields.js +11 -11
- package/src/{removeInferiorDataFields.spec.js → removeInferiorDataFields.test.js} +13 -13
- package/src/resolvable-ext-references-melinda.js +1 -1
- package/src/{resolvable-ext-references-melinda.spec.js → resolvable-ext-references-melinda.test.js} +42 -27
- package/src/resolveOrphanedSubfield6s.js +5 -5
- package/src/{resolveOrphanedSubfield6s.spec.js → resolveOrphanedSubfield6s.test.js} +13 -13
- package/src/sanitize-vocabulary-source-codes.js +4 -4
- package/src/{sanitize-vocabulary-source-codes.spec.js → sanitize-vocabulary-source-codes.test.js} +16 -14
- package/src/{sort-tags.spec.js → sort-tags.test.js} +9 -11
- package/src/sortFields.js +4 -4
- package/src/{sortFields.spec.js → sortFields.test.js} +12 -13
- package/src/sortRelatorTerms.js +3 -3
- package/src/{sortRelatorTerms.spec.js → sortRelatorTerms.test.js} +13 -13
- package/src/sortSubfields.js +1 -1
- package/src/{sortSubfields.spec.js → sortSubfields.test.js} +13 -13
- package/src/stripPunctuation.js +3 -3
- package/src/{stripPunctuation.spec.js → stripPunctuation.test.js} +13 -13
- package/src/subfield-exclusion.js +1 -1
- package/src/{subfield-exclusion.spec.js → subfield-exclusion.test.js} +45 -36
- package/src/subfield6Utils.js +6 -10
- package/src/subfield8Utils.js +4 -4
- package/src/subfieldValueNormalizations.js +3 -3
- package/src/{subfieldValueNormalizations.spec.js → subfieldValueNormalizations.test.js} +18 -14
- package/src/sync-007-and-300.js +2 -2
- package/src/{sync-007-and-300.spec.js → sync-007-and-300.test.js} +13 -13
- package/src/translate-terms.js +3 -3
- package/src/{translate-terms.spec.js → translate-terms.test.js} +13 -13
- package/src/{typeOfDate-008.spec.js → typeOfDate-008.test.js} +12 -13
- package/src/{unicode-decomposition.spec.js → unicode-decomposition.test.js} +10 -16
- package/src/update-field-540.js +2 -2
- package/src/{update-field-540.spec.js → update-field-540.test.js} +13 -10
- package/src/urn.js +2 -2
- package/src/{urn.spec.js → urn.test.js} +12 -13
- package/src/utils.js +3 -3
- package/test-fixtures/field-505-separators/03/expectedResult.json +3 -1
- package/test-fixtures/field-505-separators/03/record.json +3 -0
- package/test-fixtures/normalize-subfield-value/01/metadata.json +4 -1
- package/test-fixtures/normalize-subfield-value/01/record.json +3 -0
- package/test-fixtures/normalize-subfield-value/02/expectedResult.json +3 -1
- package/test-fixtures/normalize-subfield-value/02/metadata.json +2 -1
- package/test-fixtures/normalize-subfield-value/02/record.json +3 -0
- package/test-fixtures/sanitize-vocabulary-source-codes/f03/expectedResult.json +3 -1
- package/test-fixtures/sanitize-vocabulary-source-codes/f04/expectedResult.json +3 -1
- package/test-fixtures/sanitize-vocabulary-source-codes/v04/metadata.json +1 -4
- package/test-fixtures/sanitize-vocabulary-source-codes/v04/record.json +1 -1
- package/dist/access-rights.spec.js +0 -195
- package/dist/access-rights.spec.js.map +0 -1
- package/dist/addMissingField041.spec.js +0 -45
- package/dist/addMissingField041.spec.js.map +0 -1
- package/dist/addMissingField336.spec.js +0 -45
- package/dist/addMissingField336.spec.js.map +0 -1
- package/dist/addMissingField337.spec.js +0 -43
- package/dist/addMissingField337.spec.js.map +0 -1
- package/dist/addMissingField338.spec.js +0 -45
- package/dist/addMissingField338.spec.js.map +0 -1
- package/dist/cyrillux-usemarcon-replacement.spec.js +0 -45
- package/dist/cyrillux-usemarcon-replacement.spec.js.map +0 -1
- package/dist/cyrillux.spec.js +0 -46
- package/dist/cyrillux.spec.js.map +0 -1
- package/dist/disambiguateSeriesStatements.spec.js +0 -51
- package/dist/disambiguateSeriesStatements.spec.js.map +0 -1
- package/dist/double-commas.spec.js +0 -73
- package/dist/double-commas.spec.js.map +0 -1
- package/dist/duplicates-ind1.spec.js +0 -45
- package/dist/duplicates-ind1.spec.js.map +0 -1
- package/dist/empty-fields.spec.js +0 -118
- package/dist/empty-fields.spec.js.map +0 -1
- package/dist/ending-punctuation.spec.js +0 -2654
- package/dist/ending-punctuation.spec.js.map +0 -1
- package/dist/ending-whitespace.spec.js +0 -42
- package/dist/ending-whitespace.spec.js.map +0 -1
- package/dist/field-008-18-34-character-groups.spec.js +0 -51
- package/dist/field-008-18-34-character-groups.spec.js.map +0 -1
- package/dist/field-505-separators.spec.js +0 -51
- package/dist/field-505-separators.spec.js.map +0 -1
- package/dist/field-521-fix.spec.js +0 -51
- package/dist/field-521-fix.spec.js.map +0 -1
- package/dist/field-exclusion.spec.js +0 -1054
- package/dist/field-exclusion.spec.js.map +0 -1
- package/dist/field-structure.spec.js +0 -535
- package/dist/field-structure.spec.js.map +0 -1
- package/dist/fields-present.spec.js +0 -121
- package/dist/fields-present.spec.js.map +0 -1
- package/dist/fix-33X.spec.js +0 -45
- package/dist/fix-33X.spec.js.map +0 -1
- package/dist/fix-country-codes.spec.js +0 -51
- package/dist/fix-country-codes.spec.js.map +0 -1
- package/dist/fix-language-codes.spec.js +0 -44
- package/dist/fix-language-codes.spec.js.map +0 -1
- package/dist/fixRelatorTerms.spec.js +0 -51
- package/dist/fixRelatorTerms.spec.js.map +0 -1
- package/dist/fixed-fields.spec.js +0 -140
- package/dist/fixed-fields.spec.js.map +0 -1
- package/dist/identical-fields.spec.js +0 -99
- package/dist/identical-fields.spec.js.map +0 -1
- package/dist/indicator-fixes.spec.js +0 -51
- package/dist/indicator-fixes.spec.js.map +0 -1
- package/dist/isbn-issn.spec.js +0 -595
- package/dist/isbn-issn.spec.js.map +0 -1
- package/dist/item-language.spec.js +0 -306
- package/dist/item-language.spec.js.map +0 -1
- package/dist/melindaCustomMergeFields.json +0 -5120
- package/dist/merge-fields.spec.js +0 -51
- package/dist/merge-fields.spec.js.map +0 -1
- package/dist/mergeField500Lisapainokset.spec.js +0 -51
- package/dist/mergeField500Lisapainokset.spec.js.map +0 -1
- package/dist/mergeRelatorTermFields.spec.js +0 -51
- package/dist/mergeRelatorTermFields.spec.js.map +0 -1
- package/dist/modernize-502.spec.js +0 -49
- package/dist/modernize-502.spec.js.map +0 -1
- package/dist/multiple-subfield-0.spec.js +0 -51
- package/dist/multiple-subfield-0.spec.js.map +0 -1
- package/dist/non-breaking-space.spec.js +0 -42
- package/dist/non-breaking-space.spec.js.map +0 -1
- package/dist/normalize-dashes.spec.js +0 -51
- package/dist/normalize-dashes.spec.js.map +0 -1
- package/dist/normalize-identifiers.spec.js +0 -51
- package/dist/normalize-identifiers.spec.js.map +0 -1
- package/dist/normalize-qualifying-information.spec.js +0 -51
- package/dist/normalize-qualifying-information.spec.js.map +0 -1
- package/dist/normalize-utf8-diacritics.spec.js +0 -51
- package/dist/normalize-utf8-diacritics.spec.js.map +0 -1
- package/dist/punctuation.spec.js +0 -51
- package/dist/punctuation.spec.js.map +0 -1
- package/dist/punctuation2.spec.js +0 -51
- package/dist/punctuation2.spec.js.map +0 -1
- package/dist/reindexSubfield6OccurenceNumbers.spec.js +0 -51
- package/dist/reindexSubfield6OccurenceNumbers.spec.js.map +0 -1
- package/dist/removeDuplicateDataFields.spec.js +0 -51
- package/dist/removeDuplicateDataFields.spec.js.map +0 -1
- package/dist/removeInferiorDataFields.spec.js +0 -51
- package/dist/removeInferiorDataFields.spec.js.map +0 -1
- package/dist/resolvable-ext-references-melinda.spec.js +0 -166
- package/dist/resolvable-ext-references-melinda.spec.js.map +0 -1
- package/dist/resolveOrphanedSubfield6s.spec.js +0 -51
- package/dist/resolveOrphanedSubfield6s.spec.js.map +0 -1
- package/dist/sanitize-vocabulary-source-codes.spec.js +0 -51
- package/dist/sanitize-vocabulary-source-codes.spec.js.map +0 -1
- package/dist/sort-tags.spec.js +0 -207
- package/dist/sort-tags.spec.js.map +0 -1
- package/dist/sortFields.spec.js +0 -51
- package/dist/sortFields.spec.js.map +0 -1
- package/dist/sortRelatorTerms.spec.js +0 -51
- package/dist/sortRelatorTerms.spec.js.map +0 -1
- package/dist/sortSubfields.spec.js +0 -52
- package/dist/sortSubfields.spec.js.map +0 -1
- package/dist/stripPunctuation.spec.js +0 -51
- package/dist/stripPunctuation.spec.js.map +0 -1
- package/dist/subfield-exclusion.spec.js +0 -523
- package/dist/subfield-exclusion.spec.js.map +0 -1
- package/dist/subfieldValueNormalizations.spec.js +0 -51
- package/dist/subfieldValueNormalizations.spec.js.map +0 -1
- package/dist/sync-007-and-300.spec.js +0 -51
- package/dist/sync-007-and-300.spec.js.map +0 -1
- package/dist/translate-terms.spec.js +0 -51
- package/dist/translate-terms.spec.js.map +0 -1
- package/dist/typeOfDate-008.spec.js +0 -47
- package/dist/typeOfDate-008.spec.js.map +0 -1
- package/dist/unicode-decomposition.spec.js +0 -91
- package/dist/unicode-decomposition.spec.js.map +0 -1
- package/dist/update-field-540.spec.js +0 -51
- package/dist/update-field-540.spec.js.map +0 -1
- package/dist/urn.spec.js +0 -52
- package/dist/urn.spec.js.map +0 -1
- package/src/cyrillux-usemarcon-replacement.spec.js +0 -47
|
@@ -1,103 +1,60 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
var _clone = _interopRequireDefault(require("clone"));
|
|
12
|
-
var _punctuation = require("./punctuation2");
|
|
13
|
-
var _utils = require("./utils.js");
|
|
14
|
-
var _normalizeIdentifiers = require("./normalize-identifiers");
|
|
15
|
-
var _debug = _interopRequireDefault(require("debug"));
|
|
16
|
-
var _normalizeSubfieldValueForComparison = require("./normalizeSubfieldValueForComparison");
|
|
17
|
-
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
18
|
-
/*
|
|
19
|
-
Note that this file contains very powerful normalizations and spells that are:
|
|
20
|
-
- meant for comparing similarity/mergability of two fields (clone, normalize, compare),
|
|
21
|
-
- and NOT for modifying the actual field!
|
|
22
|
-
|
|
23
|
-
This is mainly used by melinda-marc-record-merge-reducers. However, also removeInferiorDataFields fixer also used this.
|
|
24
|
-
Thus it is here. However, most of the testing is done via merge-reducers...
|
|
25
|
-
*/
|
|
26
|
-
|
|
27
|
-
const debug = (0, _debug.default)('@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison');
|
|
28
|
-
//const debugData = debug.extend('data');
|
|
29
|
-
const debugDev = debug.extend('dev');
|
|
30
|
-
function isEnnakkotietoSubfieldG(subfield) {
|
|
31
|
-
if (subfield.code !== 'g') {
|
|
1
|
+
import clone from "clone";
|
|
2
|
+
import { fieldStripPunctuation } from "./punctuation2.js";
|
|
3
|
+
import { fieldToString, isControlSubfieldCode } from "./utils.js";
|
|
4
|
+
import { fieldNormalizeControlNumbers } from "./normalize-identifiers.js";
|
|
5
|
+
import createDebugLogger from "debug";
|
|
6
|
+
import { normalizePartData, subfieldContainsPartData } from "./normalizeSubfieldValueForComparison.js";
|
|
7
|
+
const debug = createDebugLogger("@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison");
|
|
8
|
+
const debugDev = debug.extend("dev");
|
|
9
|
+
export function isEnnakkotietoSubfieldG(subfield) {
|
|
10
|
+
if (subfield.code !== "g") {
|
|
32
11
|
return false;
|
|
33
12
|
}
|
|
34
13
|
return subfield.value.match(/^ENNAKKOTIETO\.?$/gui);
|
|
35
14
|
}
|
|
36
15
|
function debugFieldComparison(oldField, newField) {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
// We may drop certain subfields:
|
|
40
|
-
if (oldField.subfields.length === newField.subfields.length) {
|
|
41
|
-
oldField.subfields.forEach((subfield, index) => {
|
|
42
|
-
const newValue = newField.subfields[index].value;
|
|
43
|
-
if (subfield.value !== newValue) {
|
|
44
|
-
nvdebug(`NORMALIZE SUBFIELD: '${subfield.value}' => '${newValue}'`, debugDev);
|
|
45
|
-
}
|
|
46
|
-
});
|
|
47
|
-
}
|
|
48
|
-
*/
|
|
49
|
-
const oldString = (0, _utils.fieldToString)(oldField);
|
|
50
|
-
const newString = (0, _utils.fieldToString)(newField);
|
|
16
|
+
const oldString = fieldToString(oldField);
|
|
17
|
+
const newString = fieldToString(newField);
|
|
51
18
|
if (oldString === newString) {
|
|
52
19
|
return;
|
|
53
20
|
}
|
|
54
|
-
//nvdebug(`NORMALIZE FIELD:\n '${fieldToString(oldField)}' =>\n '${fieldToString(newField)}'`, debugDev);
|
|
55
21
|
}
|
|
56
|
-
function containsHumanName(tag =
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (subfieldCode === undefined || subfieldCode === 'a') {
|
|
22
|
+
function containsHumanName(tag = "???", subfieldCode = void 0) {
|
|
23
|
+
if (["100", "600", "700", "800"].includes(tag)) {
|
|
24
|
+
if (subfieldCode === void 0 || subfieldCode === "a") {
|
|
60
25
|
return true;
|
|
61
26
|
}
|
|
62
27
|
}
|
|
63
|
-
// Others?
|
|
64
28
|
return false;
|
|
65
29
|
}
|
|
66
|
-
function containsCorporateName(tag =
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
if (subfieldCode === undefined || subfieldCode === 'a') {
|
|
30
|
+
function containsCorporateName(tag = "???", subfieldCode = void 0) {
|
|
31
|
+
if (["110", "610", "710", "810"].includes(tag)) {
|
|
32
|
+
if (subfieldCode === void 0 || subfieldCode === "a") {
|
|
70
33
|
return true;
|
|
71
34
|
}
|
|
72
35
|
}
|
|
73
|
-
// Others?
|
|
74
36
|
return false;
|
|
75
37
|
}
|
|
76
38
|
function skipAllSubfieldNormalizations(value, subfieldCode, tag) {
|
|
77
|
-
if (isEnnakkotietoSubfieldG({
|
|
78
|
-
'code': subfieldCode,
|
|
79
|
-
value
|
|
80
|
-
})) {
|
|
39
|
+
if (isEnnakkotietoSubfieldG({ "code": subfieldCode, value })) {
|
|
81
40
|
return true;
|
|
82
41
|
}
|
|
83
|
-
if (tag ===
|
|
84
|
-
// A
|
|
42
|
+
if (tag === "035" && ["a", "z"].includes(subfieldCode)) {
|
|
85
43
|
return true;
|
|
86
44
|
}
|
|
87
|
-
if (
|
|
45
|
+
if (isControlSubfieldCode(subfieldCode)) {
|
|
88
46
|
return true;
|
|
89
47
|
}
|
|
90
48
|
return false;
|
|
91
49
|
}
|
|
92
50
|
function skipSubfieldLowercase(value, subfieldCode, tag) {
|
|
93
|
-
|
|
94
|
-
if ((0, _normalizeSubfieldValueForComparison.subfieldContainsPartData)(tag, subfieldCode)) {
|
|
51
|
+
if (subfieldContainsPartData(tag, subfieldCode)) {
|
|
95
52
|
return true;
|
|
96
53
|
}
|
|
97
54
|
return skipAllSubfieldNormalizations(value, subfieldCode, tag);
|
|
98
55
|
}
|
|
99
56
|
function skipAllFieldNormalizations(tag) {
|
|
100
|
-
if ([
|
|
57
|
+
if (["LOW", "SID"].includes(tag)) {
|
|
101
58
|
return true;
|
|
102
59
|
}
|
|
103
60
|
return false;
|
|
@@ -106,12 +63,8 @@ function subfieldValueLowercase(value, subfieldCode, tag) {
|
|
|
106
63
|
if (skipSubfieldLowercase(value, subfieldCode, tag)) {
|
|
107
64
|
return value;
|
|
108
65
|
}
|
|
109
|
-
|
|
110
|
-
//return value.toLowerCase();
|
|
111
66
|
const newValue = value.toLowerCase();
|
|
112
67
|
if (newValue !== value) {
|
|
113
|
-
//nvdebug(`SVL ${tag} $${subfieldCode} '${value}' =>`, debugDev);
|
|
114
|
-
//nvdebug(`SVL ${tag} $${subfieldCode} '${newValue}'`, debugDev);
|
|
115
68
|
return newValue;
|
|
116
69
|
}
|
|
117
70
|
return value;
|
|
@@ -123,49 +76,43 @@ function fieldLowercase(field) {
|
|
|
123
76
|
if (skipFieldLowercase(field)) {
|
|
124
77
|
return;
|
|
125
78
|
}
|
|
126
|
-
field.subfields.forEach(sf => subfieldLowercase(sf, field.tag));
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if (skipAllFieldNormalizations(field.tag)) {
|
|
79
|
+
field.subfields.forEach((sf) => subfieldLowercase(sf, field.tag));
|
|
80
|
+
function skipFieldLowercase(field2) {
|
|
81
|
+
if (skipAllFieldNormalizations(field2.tag)) {
|
|
130
82
|
return true;
|
|
131
83
|
}
|
|
132
|
-
|
|
133
|
-
if (!containsHumanName(field.tag) && !containsCorporateName(field.tag) && !['240', '245', '630'].includes(field.tag)) {
|
|
84
|
+
if (!containsHumanName(field2.tag) && !containsCorporateName(field2.tag) && !["240", "245", "630"].includes(field2.tag)) {
|
|
134
85
|
return true;
|
|
135
86
|
}
|
|
136
87
|
return false;
|
|
137
88
|
}
|
|
138
89
|
}
|
|
139
90
|
function hack490SubfieldA(field) {
|
|
140
|
-
if (field.tag !==
|
|
91
|
+
if (field.tag !== "490") {
|
|
141
92
|
return;
|
|
142
93
|
}
|
|
143
|
-
field.subfields.forEach(sf => removeSarja(sf));
|
|
144
|
-
|
|
145
|
-
// NB! This won't work, if the punctuation has not been stripped beforehand!
|
|
94
|
+
field.subfields.forEach((sf) => removeSarja(sf));
|
|
146
95
|
function removeSarja(subfield) {
|
|
147
|
-
if (subfield.code !==
|
|
96
|
+
if (subfield.code !== "a") {
|
|
148
97
|
return;
|
|
149
98
|
}
|
|
150
|
-
const tmp = subfield.value.replace(/ ?-(?:[a-z]|ä|ö)*sarja$/u,
|
|
99
|
+
const tmp = subfield.value.replace(/ ?-(?:[a-z]|ä|ö)*sarja$/u, "");
|
|
151
100
|
if (tmp.length > 0) {
|
|
152
101
|
subfield.value = tmp;
|
|
153
102
|
return;
|
|
154
103
|
}
|
|
155
104
|
}
|
|
156
105
|
}
|
|
157
|
-
function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {
|
|
158
|
-
|
|
159
|
-
if (subfieldCode === 'z' && ['765', '767', '770', '772', '773', '774', '776', '777', '780', '785', '786', '787'].includes(tag)) {
|
|
106
|
+
export function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {
|
|
107
|
+
if (subfieldCode === "z" && ["765", "767", "770", "772", "773", "774", "776", "777", "780", "785", "786", "787"].includes(tag)) {
|
|
160
108
|
return true;
|
|
161
109
|
}
|
|
162
|
-
if (tag ===
|
|
110
|
+
if (tag === "020" && subfieldCode === "a") {
|
|
163
111
|
return true;
|
|
164
112
|
}
|
|
165
113
|
return false;
|
|
166
114
|
}
|
|
167
115
|
function looksLikeIsbn(value) {
|
|
168
|
-
// Does not check validity!
|
|
169
116
|
if (value.match(/^(?:[0-9]-?){9}(?:[0-9]-?[0-9]-?[0-9]-?)?[0-9Xx]$/u)) {
|
|
170
117
|
return true;
|
|
171
118
|
}
|
|
@@ -175,111 +122,74 @@ function normalizeISBN(field) {
|
|
|
175
122
|
if (!field.subfields) {
|
|
176
123
|
return;
|
|
177
124
|
}
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));
|
|
181
|
-
relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf)); // eslint-disable-line array-callback-return
|
|
182
|
-
|
|
125
|
+
const relevantSubfields = field.subfields.filter((sf) => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));
|
|
126
|
+
relevantSubfields.forEach((sf) => normalizeIsbnSubfield(sf));
|
|
183
127
|
function normalizeIsbnSubfield(sf) {
|
|
184
|
-
|
|
185
|
-
sf.value = sf.value.replace(
|
|
186
|
-
sf.value = sf.value.replace(/x/u, 'X');
|
|
128
|
+
sf.value = sf.value.replace(/-/ug, "");
|
|
129
|
+
sf.value = sf.value.replace(/x/u, "X");
|
|
187
130
|
}
|
|
188
131
|
}
|
|
189
132
|
function fieldSpecificHacks(field) {
|
|
190
|
-
normalizeISBN(field);
|
|
133
|
+
normalizeISBN(field);
|
|
191
134
|
hack490SubfieldA(field);
|
|
192
135
|
}
|
|
193
|
-
function fieldTrimSubfieldValues(field) {
|
|
194
|
-
field.subfields?.forEach(sf => {
|
|
195
|
-
sf.value = sf.value.replace(/^[ \t\n]+/u,
|
|
196
|
-
sf.value = sf.value.replace(/[ \t\n]+$/u,
|
|
197
|
-
sf.value = sf.value.replace(/[ \t\n]+/gu,
|
|
136
|
+
export function fieldTrimSubfieldValues(field) {
|
|
137
|
+
field.subfields?.forEach((sf) => {
|
|
138
|
+
sf.value = sf.value.replace(/^[ \t\n]+/u, "");
|
|
139
|
+
sf.value = sf.value.replace(/[ \t\n]+$/u, "");
|
|
140
|
+
sf.value = sf.value.replace(/[ \t\n]+/gu, " ");
|
|
198
141
|
});
|
|
199
142
|
}
|
|
200
143
|
function fieldRemoveDecomposedDiacritics(field) {
|
|
201
|
-
|
|
202
|
-
// allows us to compare authors and avoid duplicate fields.
|
|
203
|
-
field.subfields.forEach(sf => {
|
|
144
|
+
field.subfields.forEach((sf) => {
|
|
204
145
|
sf.value = removeDecomposedDiacritics(sf.value);
|
|
205
146
|
});
|
|
206
147
|
}
|
|
207
|
-
function removeDecomposedDiacritics(value =
|
|
208
|
-
|
|
209
|
-
// NB #2: Finnish letters 'å', 'ä', 'ö', 'Å', Ä', and 'Ö' should be handled (=precomposed) before calling this. (= keep them as is)
|
|
210
|
-
// NB #3: Calling our very own fixComposition() before this function handles both #1 and #2.
|
|
211
|
-
return String(value).replace(/\p{Diacritic}/gu, '');
|
|
148
|
+
function removeDecomposedDiacritics(value = "") {
|
|
149
|
+
return String(value).replace(/\p{Diacritic}/gu, "");
|
|
212
150
|
}
|
|
213
151
|
function normalizeSubfieldValue(value, subfieldCode, tag) {
|
|
214
|
-
// NB! For comparison of values only
|
|
215
|
-
/* eslint-disable */
|
|
216
152
|
value = subfieldValueLowercase(value, subfieldCode, tag);
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
if (['130', '730'].includes(tag) && subfieldCode === 'a') {
|
|
223
|
-
value = value.replace(' : ', ', '); // "Halloween ends (elokuva, 2022)" vs "Halloween ends (elokuva : 2023)"
|
|
153
|
+
value = normalizePartData(value, subfieldCode, tag);
|
|
154
|
+
value = value.replace(/^\[([^[\]]+)\]/gu, "$1");
|
|
155
|
+
if (["130", "730"].includes(tag) && subfieldCode === "a") {
|
|
156
|
+
value = value.replace(" : ", ", ");
|
|
224
157
|
}
|
|
225
|
-
/* eslint-enable */
|
|
226
|
-
|
|
227
|
-
// Not going to do these in the foreseeable future, but keeping them here for discussion:
|
|
228
|
-
// Possible normalizations include but are not limited to:
|
|
229
|
-
// ø => ö? Might be language dependent: 041 $a fin => ö, 041 $a eng => o?
|
|
230
|
-
// Ø => Ö?
|
|
231
|
-
// ß => ss
|
|
232
|
-
// þ => th (NB! Both upper and lower case)
|
|
233
|
-
// ...
|
|
234
|
-
// Probably nots:
|
|
235
|
-
// ü => y (probably not, though this correlates with Finnish letter-to-sound rules)
|
|
236
|
-
// w => v (OK for Finnish sorting in certain cases, but we are not here, are we?)
|
|
237
|
-
// I guess we should use decomposed values in code here. (Not sure what composition my examples above use.)
|
|
238
158
|
return value;
|
|
239
159
|
}
|
|
240
|
-
function cloneAndRemovePunctuation(field) {
|
|
241
|
-
const clonedField = (
|
|
160
|
+
export function cloneAndRemovePunctuation(field) {
|
|
161
|
+
const clonedField = clone(field);
|
|
242
162
|
if (fieldSkipNormalization(field)) {
|
|
243
163
|
return clonedField;
|
|
244
164
|
}
|
|
245
|
-
|
|
165
|
+
fieldStripPunctuation(clonedField);
|
|
246
166
|
fieldTrimSubfieldValues(clonedField);
|
|
247
|
-
debugDev(
|
|
167
|
+
debugDev("PUNC");
|
|
248
168
|
debugFieldComparison(field, clonedField);
|
|
249
169
|
return clonedField;
|
|
250
170
|
}
|
|
251
171
|
function removeCharsThatDontCarryMeaning(value, tag, subfieldCode) {
|
|
252
|
-
if (tag ===
|
|
172
|
+
if (tag === "080") {
|
|
253
173
|
return value;
|
|
254
174
|
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
// MRA-273: Handle X00$a name initials.
|
|
259
|
-
// NB #1: that we remove spaces for comparison (as it simpler), though actually space should be used. Doesn't matter as this is comparison only.
|
|
260
|
-
// NB #2: we might/should eventually write a validator/fixer that adds those spaces. After that point, this expection should become obsolete.
|
|
261
|
-
if (subfieldCode === 'a' && ['100', '400', '600', '700', '800'].includes(tag)) {
|
|
262
|
-
// 400 is used in auth records. It's not a bib field at all.
|
|
263
|
-
value = value.replace(/([A-Z]|Å|Ä|Ö)\. +/ugi, '$1.');
|
|
175
|
+
value = value.replace(/['‘’"„“”«»]/gu, "");
|
|
176
|
+
if (subfieldCode === "a" && ["100", "400", "600", "700", "800"].includes(tag)) {
|
|
177
|
+
value = value.replace(/([A-Z]|Å|Ä|Ö)\. +/ugi, "$1.");
|
|
264
178
|
}
|
|
265
179
|
return value;
|
|
266
180
|
}
|
|
267
181
|
function normalizeField(field) {
|
|
268
|
-
|
|
269
|
-
(0, _punctuation.fieldStripPunctuation)(field);
|
|
182
|
+
fieldStripPunctuation(field);
|
|
270
183
|
fieldLowercase(field);
|
|
271
|
-
|
|
184
|
+
fieldNormalizeControlNumbers(field);
|
|
272
185
|
return field;
|
|
273
186
|
}
|
|
274
|
-
function cloneAndNormalizeFieldForComparison(field) {
|
|
275
|
-
|
|
276
|
-
// Some of the normalizations might be considered a bit overkill for other purposes.
|
|
277
|
-
const clonedField = (0, _clone.default)(field);
|
|
187
|
+
export function cloneAndNormalizeFieldForComparison(field) {
|
|
188
|
+
const clonedField = clone(field);
|
|
278
189
|
if (fieldSkipNormalization(field)) {
|
|
279
190
|
return clonedField;
|
|
280
191
|
}
|
|
281
|
-
clonedField.subfields.forEach(sf => {
|
|
282
|
-
// Do this for all fields or some fields?
|
|
192
|
+
clonedField.subfields.forEach((sf) => {
|
|
283
193
|
sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);
|
|
284
194
|
sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);
|
|
285
195
|
});
|
|
@@ -287,14 +197,13 @@ function cloneAndNormalizeFieldForComparison(field) {
|
|
|
287
197
|
fieldRemoveDecomposedDiacritics(clonedField);
|
|
288
198
|
fieldSpecificHacks(clonedField);
|
|
289
199
|
fieldTrimSubfieldValues(clonedField);
|
|
290
|
-
debugFieldComparison(field, clonedField);
|
|
291
|
-
|
|
200
|
+
debugFieldComparison(field, clonedField);
|
|
292
201
|
return clonedField;
|
|
293
202
|
}
|
|
294
203
|
function fieldSkipNormalization(field) {
|
|
295
|
-
if (!field.subfields || [
|
|
204
|
+
if (!field.subfields || ["018", "066", "080", "083"].includes(field.tag)) {
|
|
296
205
|
return true;
|
|
297
206
|
}
|
|
298
207
|
return false;
|
|
299
208
|
}
|
|
300
|
-
//# sourceMappingURL=normalizeFieldForComparison.js.map
|
|
209
|
+
//# sourceMappingURL=normalizeFieldForComparison.js.map
|
|
@@ -1 +1,7 @@
|
|
|
1
|
-
{"version":3,"file":"normalizeFieldForComparison.js","names":["_clone","_interopRequireDefault","require","_punctuation","_utils","_normalizeIdentifiers","_debug","_normalizeSubfieldValueForComparison","e","__esModule","default","debug","createDebugLogger","debugDev","extend","isEnnakkotietoSubfieldG","subfield","code","value","match","debugFieldComparison","oldField","newField","oldString","fieldToString","newString","containsHumanName","tag","subfieldCode","undefined","includes","containsCorporateName","skipAllSubfieldNormalizations","isControlSubfieldCode","skipSubfieldLowercase","subfieldContainsPartData","skipAllFieldNormalizations","subfieldValueLowercase","newValue","toLowerCase","subfieldLowercase","sf","fieldLowercase","field","skipFieldLowercase","subfields","forEach","hack490SubfieldA","removeSarja","tmp","replace","length","tagAndSubfieldCodeReferToIsbn","looksLikeIsbn","normalizeISBN","relevantSubfields","filter","normalizeIsbnSubfield","fieldSpecificHacks","fieldTrimSubfieldValues","fieldRemoveDecomposedDiacritics","removeDecomposedDiacritics","String","normalizeSubfieldValue","normalizePartData","cloneAndRemovePunctuation","clonedField","clone","fieldSkipNormalization","fieldStripPunctuation","removeCharsThatDontCarryMeaning","normalizeField","fieldNormalizeControlNumbers","cloneAndNormalizeFieldForComparison"],"sources":["../src/normalizeFieldForComparison.js"],"sourcesContent":["/*\n Note that this file contains very powerful normalizations and spells that are:\n - meant for comparing similarity/mergability of two fields (clone, normalize, compare),\n - and NOT for modifying the actual field!\n\n This is mainly used by melinda-marc-record-merge-reducers. However, also removeInferiorDataFields fixer also used this.\n Thus it is here. However, most of the testing is done via merge-reducers...\n*/\nimport clone from 'clone';\nimport {fieldStripPunctuation} from './punctuation2';\nimport {fieldToString, isControlSubfieldCode} from './utils.js';\n\nimport {fieldNormalizeControlNumbers/*, normalizeControlSubfieldValue*/} from './normalize-identifiers';\nimport createDebugLogger from 'debug';\nimport {normalizePartData, subfieldContainsPartData} from './normalizeSubfieldValueForComparison';\n\nconst debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison');\n//const debugData = debug.extend('data');\nconst debugDev = debug.extend('dev');\n\nexport function isEnnakkotietoSubfieldG(subfield) {\n if (subfield.code !== 'g') {\n return false;\n }\n return subfield.value.match(/^ENNAKKOTIETO\\.?$/gui);\n}\n\nfunction debugFieldComparison(oldField, newField) { // NB: Debug-only function!\n /*\n // We may drop certain subfields:\n if (oldField.subfields.length === newField.subfields.length) {\n oldField.subfields.forEach((subfield, index) => {\n const newValue = newField.subfields[index].value;\n if (subfield.value !== newValue) {\n nvdebug(`NORMALIZE SUBFIELD: '${subfield.value}' => '${newValue}'`, debugDev);\n }\n });\n }\n */\n const oldString = fieldToString(oldField);\n const newString = fieldToString(newField);\n if (oldString === newString) {\n return;\n }\n //nvdebug(`NORMALIZE FIELD:\\n '${fieldToString(oldField)}' =>\\n '${fieldToString(newField)}'`, debugDev);\n}\n\nfunction containsHumanName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['100', '600', '700', '800'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction containsCorporateName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['110', '610', '710', '810'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction skipAllSubfieldNormalizations(value, subfieldCode, tag) {\n\n if (isEnnakkotietoSubfieldG({'code': subfieldCode, value})) {\n return true;\n }\n\n if (tag === '035' && ['a', 'z'].includes(subfieldCode)) { // A\n return true;\n }\n\n if (isControlSubfieldCode(subfieldCode)) {\n return true;\n }\n return false;\n}\n\nfunction skipSubfieldLowercase(value, subfieldCode, tag) {\n // These may contain Roman Numerals...\n if (subfieldContainsPartData(tag, subfieldCode)) {\n return true;\n }\n\n return skipAllSubfieldNormalizations(value, subfieldCode, tag);\n}\n\nfunction skipAllFieldNormalizations(tag) {\n if (['LOW', 'SID'].includes(tag)) {\n return true;\n }\n return false;\n}\n\n\nfunction subfieldValueLowercase(value, subfieldCode, tag) {\n if (skipSubfieldLowercase(value, subfieldCode, tag)) {\n return value;\n }\n\n //return value.toLowerCase();\n const newValue = value.toLowerCase();\n if (newValue !== value) {\n //nvdebug(`SVL ${tag} $${subfieldCode} '${value}' =>`, debugDev);\n //nvdebug(`SVL ${tag} $${subfieldCode} '${newValue}'`, debugDev);\n return newValue;\n }\n return value;\n}\n\nfunction subfieldLowercase(sf, tag) {\n sf.value = subfieldValueLowercase(sf.value, sf.code, tag);\n}\n\nfunction fieldLowercase(field) {\n if (skipFieldLowercase(field)) {\n return;\n }\n\n field.subfields.forEach(sf => subfieldLowercase(sf, field.tag)); // eslint-disable-line array-callback-return\n\n function skipFieldLowercase(field) {\n if (skipAllFieldNormalizations(field.tag)) {\n return true;\n }\n // Skip non-interesting fields\n if (!containsHumanName(field.tag) && !containsCorporateName(field.tag) && !['240', '245', '630'].includes(field.tag)) {\n return true;\n }\n\n return false;\n }\n}\n\n\nfunction hack490SubfieldA(field) {\n if (field.tag !== '490') {\n return;\n }\n field.subfields.forEach(sf => removeSarja(sf)); // eslint-disable-line array-callback-return\n\n // NB! This won't work, if the punctuation has not been stripped beforehand!\n function removeSarja(subfield) {\n if (subfield.code !== 'a') {\n return;\n }\n const tmp = subfield.value.replace(/ ?-(?:[a-z]|ä|ö)*sarja$/u, '');\n if (tmp.length > 0) {\n subfield.value = tmp;\n return;\n }\n }\n}\n\nexport function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {\n // NB! We don't do this to 020$z!\n if (subfieldCode === 'z' && ['765', '767', '770', '772', '773', '774', '776', '777', '780', '785', '786', '787'].includes(tag)) {\n return true;\n }\n if (tag === '020' && subfieldCode === 'a') {\n return true;\n }\n return false;\n}\n\nfunction looksLikeIsbn(value) {\n // Does not check validity!\n if (value.match(/^(?:[0-9]-?){9}(?:[0-9]-?[0-9]-?[0-9]-?)?[0-9Xx]$/u)) {\n return true;\n }\n return false;\n}\n\nfunction normalizeISBN(field) {\n if (!field.subfields) {\n return;\n }\n\n //nvdebug(`ISBN-field? ${fieldToString(field)}`);\n const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));\n relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf)); // eslint-disable-line array-callback-return\n\n function normalizeIsbnSubfield(sf) {\n //nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);\n sf.value = sf.value.replace(/-/ug, '');\n sf.value = sf.value.replace(/x/u, 'X');\n }\n\n}\n\nfunction fieldSpecificHacks(field) {\n normalizeISBN(field); // 020$a, not $z!\n hack490SubfieldA(field);\n}\n\nexport function fieldTrimSubfieldValues(field) {\n field.subfields?.forEach((sf) => {\n sf.value = sf.value.replace(/^[ \\t\\n]+/u, '');\n sf.value = sf.value.replace(/[ \\t\\n]+$/u, '');\n sf.value = sf.value.replace(/[ \\t\\n]+/gu, ' ');\n });\n}\n\nfunction fieldRemoveDecomposedDiacritics(field) {\n // Raison d'être/motivation: \"Sirén\" and diacriticless \"Siren\" might refer to a same surname, so this normalization\n // allows us to compare authors and avoid duplicate fields.\n field.subfields.forEach((sf) => {\n sf.value = removeDecomposedDiacritics(sf.value);\n });\n}\n\nfunction removeDecomposedDiacritics(value = '') {\n // NB #1: Does nothing to precomposed letters. Do String.normalize('NFD') first, if you want to handle them.\n // NB #2: Finnish letters 'å', 'ä', 'ö', 'Å', Ä', and 'Ö' should be handled (=precomposed) before calling this. (= keep them as is)\n // NB #3: Calling our very own fixComposition() before this function handles both #1 and #2.\n return String(value).replace(/\\p{Diacritic}/gu, '');\n}\n\nfunction normalizeSubfieldValue(value, subfieldCode, tag) {\n // NB! For comparison of values only\n /* eslint-disable */\n value = subfieldValueLowercase(value, subfieldCode, tag);\n\n // Normalize: s. = sivut = pp.\n value = normalizePartData(value, subfieldCode, tag);\n value = value.replace(/^\\[([^[\\]]+)\\]/gu, '$1'); // eslint-disable-line functional/immutable-data\n\n if (['130', '730'].includes(tag) && subfieldCode === 'a') {\n value = value.replace(' : ', ', '); // \"Halloween ends (elokuva, 2022)\" vs \"Halloween ends (elokuva : 2023)\"\n }\n /* eslint-enable */\n\n // Not going to do these in the foreseeable future, but keeping them here for discussion:\n // Possible normalizations include but are not limited to:\n // ø => ö? Might be language dependent: 041 $a fin => ö, 041 $a eng => o?\n // Ø => Ö?\n // ß => ss\n // þ => th (NB! Both upper and lower case)\n // ...\n // Probably nots:\n // ü => y (probably not, though this correlates with Finnish letter-to-sound rules)\n // w => v (OK for Finnish sorting in certain cases, but we are not here, are we?)\n // I guess we should use decomposed values in code here. (Not sure what composition my examples above use.)\n return value;\n}\n\nexport function cloneAndRemovePunctuation(field) {\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n fieldStripPunctuation(clonedField);\n fieldTrimSubfieldValues(clonedField);\n debugDev('PUNC');\n debugFieldComparison(field, clonedField);\n\n return clonedField;\n}\n\nfunction removeCharsThatDontCarryMeaning(value, tag, subfieldCode) {\n if (tag === '080') {\n return value;\n }\n\n // 3\" refers to inches, but as this is for comparison only we don't mind...\n value = value.replace(/['‘’\"„“”«»]/gu, ''); // MET-570 et al. Subset of https://hexdocs.pm/ex_unicode/Unicode.Category.QuoteMarks.html\n // MRA-273: Handle X00$a name initials.\n // NB #1: that we remove spaces for comparison (as it simpler), though actually space should be used. Doesn't matter as this is comparison only.\n // NB #2: we might/should eventually write a validator/fixer that adds those spaces. After that point, this expection should become obsolete.\n if (subfieldCode === 'a' && ['100', '400', '600', '700', '800'].includes(tag)) { // 400 is used in auth records. It's not a bib field at all.\n value = value.replace(/([A-Z]|Å|Ä|Ö)\\. +/ugi, '$1.');\n }\n\n return value;\n}\n\nfunction normalizeField(field) {\n //sf.value = removeDecomposedDiacritics(sf.value);\n fieldStripPunctuation(field);\n fieldLowercase(field);\n fieldNormalizeControlNumbers(field); // FIN11 vs FI-MELINDA etc.\n return field;\n}\n\nexport function cloneAndNormalizeFieldForComparison(field) {\n // NB! This new field is for comparison purposes only.\n // Some of the normalizations might be considered a bit overkill for other purposes.\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?\n sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);\n sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);\n });\n\n normalizeField(clonedField);\n fieldRemoveDecomposedDiacritics(clonedField);\n fieldSpecificHacks(clonedField);\n fieldTrimSubfieldValues(clonedField);\n\n\n debugFieldComparison(field, clonedField); // For debugging purposes only\n\n return clonedField;\n}\n\nfunction fieldSkipNormalization(field) {\n if (!field.subfields || ['018', '066', '080', '083'].includes(field.tag)) {\n return true;\n }\n return false;\n}\n"],"mappings":";;;;;;;;;;AAQA,IAAAA,MAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,YAAA,GAAAD,OAAA;AACA,IAAAE,MAAA,GAAAF,OAAA;AAEA,IAAAG,qBAAA,GAAAH,OAAA;AACA,IAAAI,MAAA,GAAAL,sBAAA,CAAAC,OAAA;AACA,IAAAK,oCAAA,GAAAL,OAAA;AAAkG,SAAAD,uBAAAO,CAAA,WAAAA,CAAA,IAAAA,CAAA,CAAAC,UAAA,GAAAD,CAAA,KAAAE,OAAA,EAAAF,CAAA;AAdlG;AACA;AACA;AACA;AACA;AACA;AACA;AACA;;AASA,MAAMG,KAAK,GAAG,IAAAC,cAAiB,EAAC,0EAA0E,CAAC;AAC3G;AACA,MAAMC,QAAQ,GAAGF,KAAK,CAACG,MAAM,CAAC,KAAK,CAAC;AAE7B,SAASC,uBAAuBA,CAACC,QAAQ,EAAE;EAChD,IAAIA,QAAQ,CAACC,IAAI,KAAK,GAAG,EAAE;IACzB,OAAO,KAAK;EACd;EACA,OAAOD,QAAQ,CAACE,KAAK,CAACC,KAAK,CAAC,sBAAsB,CAAC;AACrD;AAEA,SAASC,oBAAoBA,CAACC,QAAQ,EAAEC,QAAQ,EAAE;EAAE;EAClD;AACF;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;EACE,MAAMC,SAAS,GAAG,IAAAC,oBAAa,EAACH,QAAQ,CAAC;EACzC,MAAMI,SAAS,GAAG,IAAAD,oBAAa,EAACF,QAAQ,CAAC;EACzC,IAAIC,SAAS,KAAKE,SAAS,EAAE;IAC3B;EACF;EACA;AACF;AAEA,SAASC,iBAAiBA,CAACC,GAAG,GAAG,KAAK,EAAEC,YAAY,GAAGC,SAAS,EAAE;EAChE;EACA,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACC,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9C,IAAIC,YAAY,KAAKC,SAAS,IAAID,YAAY,KAAK,GAAG,EAAE;MACtD,OAAO,IAAI;IACb;EACF;EACA;EACA,OAAO,KAAK;AACd;AAEA,SAASG,qBAAqBA,CAACJ,GAAG,GAAG,KAAK,EAAEC,YAAY,GAAGC,SAAS,EAAE;EACpE;EACA,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACC,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9C,IAAIC,YAAY,KAAKC,SAAS,IAAID,YAAY,KAAK,GAAG,EAAE;MACtD,OAAO,IAAI;IACb;EACF;EACA;EACA,OAAO,KAAK;AACd;AAEA,SAASI,6BAA6BA,CAACd,KAAK,EAAEU,YAAY,EAAED,GAAG,EAAE;EAE/D,IAAIZ,uBAAuB,CAAC;IAAC,MAAM,EAAEa,YAAY;IAAEV;EAAK,CAAC,CAAC,EAAE;IAC1D,OAAO,IAAI;EACb;EAEA,IAAIS,GAAG,KAAK,KAAK,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAACG,QAAQ,CAACF,YAAY,CAAC,EAAE;IAAE;IACxD,OAAO,IAAI;EACb;EAEA,IAAI,IAAAK,4BAAqB,EAACL,YAAY,CAAC,EAAE;IACvC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASM,qBAAqBA,CAAChB,KAAK,EAAEU,YAAY,EAAED,GAAG,EAAE;EACvD;EACA,IAAI,IAAAQ,6DAAwB,EAACR,GAAG,EAAEC,YAAY,CAAC,EAAE;IAC/C,OAAO,IAAI;EACb;EAEA,OAAOI,6BAA6B,CAACd,KAAK,EAAEU,YAAY,EAAED,GAAG,CAAC;AAChE;AAEA,SAASS,0BAA0BA,CAACT,GAAG,EAAE;EACvC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAACG,QAAQ,CAACH,GAAG,CAAC,EAAE;IAChC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAGA,SAASU,sBAAsBA,CAACnB,KAAK,EAAEU,YAAY,EAAED,GAAG,EAAE;EACxD,IAAIO,qBAAqB,CAAChB,KAAK,EAAEU,YAAY,EAAED,GAAG,CAAC,EAAE;IACnD,OAAOT,KAAK;EACd;;EAEA;EACA,MAAMoB,QAAQ,GAAGpB,KAAK,CAACqB,WAAW,CAAC,CAAC;EACpC,IAAID,QAAQ,KAAKpB,KAAK,EAAE;IACtB;IACA;IACA,OAAOoB,QAAQ;EACjB;EACA,OAAOpB,KAAK;AACd;AAEA,SAASsB,iBAAiBA,CAACC,EAAE,EAAEd,GAAG,EAAE;EAClCc,EAAE,CAACvB,KAAK,GAAGmB,sBAAsB,CAACI,EAAE,CAACvB,KAAK,EAAEuB,EAAE,CAACxB,IAAI,EAAEU,GAAG,CAAC;AAC3D;AAEA,SAASe,cAAcA,CAACC,KAAK,EAAE;EAC7B,IAAIC,kBAAkB,CAACD,KAAK,CAAC,EAAE;IAC7B;EACF;EAEAA,KAAK,CAACE,SAAS,CAACC,OAAO,CAACL,EAAE,IAAID,iBAAiB,CAACC,EAAE,EAAEE,KAAK,CAAChB,GAAG,CAAC,CAAC,CAAC,CAAC;;EAEjE,SAASiB,kBAAkBA,CAACD,KAAK,EAAE;IACjC,IAAIP,0BAA0B,CAACO,KAAK,CAAChB,GAAG,CAAC,EAAE;MACzC,OAAO,IAAI;IACb;IACA;IACA,IAAI,CAACD,iBAAiB,CAACiB,KAAK,CAAChB,GAAG,CAAC,IAAI,CAACI,qBAAqB,CAACY,KAAK,CAAChB,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACG,QAAQ,CAACa,KAAK,CAAChB,GAAG,CAAC,EAAE;MACpH,OAAO,IAAI;IACb;IAEA,OAAO,KAAK;EACd;AACF;AAGA,SAASoB,gBAAgBA,CAACJ,KAAK,EAAE;EAC/B,IAAIA,KAAK,CAAChB,GAAG,KAAK,KAAK,EAAE;IACvB;EACF;EACAgB,KAAK,CAACE,SAAS,CAACC,OAAO,CAACL,EAAE,IAAIO,WAAW,CAACP,EAAE,CAAC,CAAC,CAAC,CAAC;;EAEhD;EACA,SAASO,WAAWA,CAAChC,QAAQ,EAAE;IAC7B,IAAIA,QAAQ,CAACC,IAAI,KAAK,GAAG,EAAE;MACzB;IACF;IACA,MAAMgC,GAAG,GAAGjC,QAAQ,CAACE,KAAK,CAACgC,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC;IAClE,IAAID,GAAG,CAACE,MAAM,GAAG,CAAC,EAAE;MAClBnC,QAAQ,CAACE,KAAK,GAAG+B,GAAG;MACpB;IACF;EACF;AACF;AAEO,SAASG,6BAA6BA,CAACzB,GAAG,EAAEC,YAAY,EAAE;EAC/D;EACA,IAAIA,YAAY,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACE,QAAQ,CAACH,GAAG,CAAC,EAAE;IAC9H,OAAO,IAAI;EACb;EACA,IAAIA,GAAG,KAAK,KAAK,IAAIC,YAAY,KAAK,GAAG,EAAE;IACzC,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASyB,aAAaA,CAACnC,KAAK,EAAE;EAC5B;EACA,IAAIA,KAAK,CAACC,KAAK,CAAC,oDAAoD,CAAC,EAAE;IACrE,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd;AAEA,SAASmC,aAAaA,CAACX,KAAK,EAAE;EAC5B,IAAI,CAACA,KAAK,CAACE,SAAS,EAAE;IACpB;EACF;;EAEA;EACA,MAAMU,iBAAiB,GAAGZ,KAAK,CAACE,SAAS,CAACW,MAAM,CAACf,EAAE,IAAIW,6BAA6B,CAACT,KAAK,CAAChB,GAAG,EAAEc,EAAE,CAACxB,IAAI,CAAC,IAAIoC,aAAa,CAACZ,EAAE,CAACvB,KAAK,CAAC,CAAC;EACpIqC,iBAAiB,CAACT,OAAO,CAACL,EAAE,IAAIgB,qBAAqB,CAAChB,EAAE,CAAC,CAAC,CAAC,CAAC;;EAE5D,SAASgB,qBAAqBA,CAAChB,EAAE,EAAE;IACjC;IACAA,EAAE,CAACvB,KAAK,GAAGuB,EAAE,CAACvB,KAAK,CAACgC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;IACtCT,EAAE,CAACvB,KAAK,GAAGuB,EAAE,CAACvB,KAAK,CAACgC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC;EACxC;AAEF;AAEA,SAASQ,kBAAkBA,CAACf,KAAK,EAAE;EACjCW,aAAa,CAACX,KAAK,CAAC,CAAC,CAAC;EACtBI,gBAAgB,CAACJ,KAAK,CAAC;AACzB;AAEO,SAASgB,uBAAuBA,CAAChB,KAAK,EAAE;EAC7CA,KAAK,CAACE,SAAS,EAAEC,OAAO,CAAEL,EAAE,IAAK;IAC/BA,EAAE,CAACvB,KAAK,GAAGuB,EAAE,CAACvB,KAAK,CAACgC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;IAC7CT,EAAE,CAACvB,KAAK,GAAGuB,EAAE,CAACvB,KAAK,CAACgC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC;IAC7CT,EAAE,CAACvB,KAAK,GAAGuB,EAAE,CAACvB,KAAK,CAACgC,OAAO,CAAC,YAAY,EAAE,GAAG,CAAC;EAChD,CAAC,CAAC;AACJ;AAEA,SAASU,+BAA+BA,CAACjB,KAAK,EAAE;EAC9C;EACA;EACAA,KAAK,CAACE,SAAS,CAACC,OAAO,CAAEL,EAAE,IAAK;IAC9BA,EAAE,CAACvB,KAAK,GAAG2C,0BAA0B,CAACpB,EAAE,CAACvB,KAAK,CAAC;EACjD,CAAC,CAAC;AACJ;AAEA,SAAS2C,0BAA0BA,CAAC3C,KAAK,GAAG,EAAE,EAAE;EAC9C;EACA;EACA;EACA,OAAO4C,MAAM,CAAC5C,KAAK,CAAC,CAACgC,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;AACrD;AAEA,SAASa,sBAAsBA,CAAC7C,KAAK,EAAEU,YAAY,EAAED,GAAG,EAAE;EACxD;EACA;EACAT,KAAK,GAAGmB,sBAAsB,CAACnB,KAAK,EAAEU,YAAY,EAAED,GAAG,CAAC;;EAExD;EACAT,KAAK,GAAG,IAAA8C,sDAAiB,EAAC9C,KAAK,EAAEU,YAAY,EAAED,GAAG,CAAC;EACnDT,KAAK,GAAGA,KAAK,CAACgC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,CAAC,CAAC;;EAEjD,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAACpB,QAAQ,CAACH,GAAG,CAAC,IAAIC,YAAY,KAAK,GAAG,EAAE;IACxDV,KAAK,GAAGA,KAAK,CAACgC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,CAAC;EACtC;EACA;;EAEA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA;EACA,OAAOhC,KAAK;AACd;AAEO,SAAS+C,yBAAyBA,CAACtB,KAAK,EAAE;EAC/C,MAAMuB,WAAW,GAAG,IAAAC,cAAK,EAACxB,KAAK,CAAC;EAChC,IAAIyB,sBAAsB,CAACzB,KAAK,CAAC,EAAE;IACjC,OAAOuB,WAAW;EACpB;EACA,IAAAG,kCAAqB,EAACH,WAAW,CAAC;EAClCP,uBAAuB,CAACO,WAAW,CAAC;EACpCrD,QAAQ,CAAC,MAAM,CAAC;EAChBO,oBAAoB,CAACuB,KAAK,EAAEuB,WAAW,CAAC;EAExC,OAAOA,WAAW;AACpB;AAEA,SAASI,+BAA+BA,CAACpD,KAAK,EAAES,GAAG,EAAEC,YAAY,EAAE;EACjE,IAAID,GAAG,KAAK,KAAK,EAAE;IACjB,OAAOT,KAAK;EACd;;EAEA;EACAA,KAAK,GAAGA,KAAK,CAACgC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC,CAAC;EAC5C;EACA;EACA;EACA,IAAItB,YAAY,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACE,QAAQ,CAACH,GAAG,CAAC,EAAE;IAAE;IAC/ET,KAAK,GAAGA,KAAK,CAACgC,OAAO,CAAC,sBAAsB,EAAE,KAAK,CAAC;EACtD;EAEA,OAAOhC,KAAK;AACd;AAEA,SAASqD,cAAcA,CAAC5B,KAAK,EAAE;EAC7B;EACA,IAAA0B,kCAAqB,EAAC1B,KAAK,CAAC;EAC5BD,cAAc,CAACC,KAAK,CAAC;EACrB,IAAA6B,kDAA4B,EAAC7B,KAAK,CAAC,CAAC,CAAC;EACrC,OAAOA,KAAK;AACd;AAEO,SAAS8B,mCAAmCA,CAAC9B,KAAK,EAAE;EACzD;EACA;EACA,MAAMuB,WAAW,GAAG,IAAAC,cAAK,EAACxB,KAAK,CAAC;EAChC,IAAIyB,sBAAsB,CAACzB,KAAK,CAAC,EAAE;IACjC,OAAOuB,WAAW;EACpB;EACAA,WAAW,CAACrB,SAAS,CAACC,OAAO,CAAEL,EAAE,IAAK;IAAE;IACtCA,EAAE,CAACvB,KAAK,GAAG6C,sBAAsB,CAACtB,EAAE,CAACvB,KAAK,EAAEuB,EAAE,CAACxB,IAAI,EAAE0B,KAAK,CAAChB,GAAG,CAAC;IAC/Dc,EAAE,CAACvB,KAAK,GAAGoD,+BAA+B,CAAC7B,EAAE,CAACvB,KAAK,EAAEyB,KAAK,CAAChB,GAAG,EAAEc,EAAE,CAACxB,IAAI,CAAC;EAC1E,CAAC,CAAC;EAEFsD,cAAc,CAACL,WAAW,CAAC;EAC3BN,+BAA+B,CAACM,WAAW,CAAC;EAC5CR,kBAAkB,CAACQ,WAAW,CAAC;EAC/BP,uBAAuB,CAACO,WAAW,CAAC;EAGpC9C,oBAAoB,CAACuB,KAAK,EAAEuB,WAAW,CAAC,CAAC,CAAC;;EAE1C,OAAOA,WAAW;AACpB;AAEA,SAASE,sBAAsBA,CAACzB,KAAK,EAAE;EACrC,IAAI,CAACA,KAAK,CAACE,SAAS,IAAI,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAACf,QAAQ,CAACa,KAAK,CAAChB,GAAG,CAAC,EAAE;IACxE,OAAO,IAAI;EACb;EACA,OAAO,KAAK;AACd","ignoreList":[]}
|
|
1
|
+
{
|
|
2
|
+
"version": 3,
|
|
3
|
+
"sources": ["../src/normalizeFieldForComparison.js"],
|
|
4
|
+
"sourcesContent": ["/*\n Note that this file contains very powerful normalizations and spells that are:\n - meant for comparing similarity/mergability of two fields (clone, normalize, compare),\n - and NOT for modifying the actual field!\n\n This is mainly used by melinda-marc-record-merge-reducers. However, also removeInferiorDataFields fixer also used this.\n Thus it is here. However, most of the testing is done via merge-reducers...\n*/\nimport clone from 'clone';\nimport {fieldStripPunctuation} from './punctuation2.js';\nimport {fieldToString, isControlSubfieldCode} from './utils.js';\n\nimport {fieldNormalizeControlNumbers/*, normalizeControlSubfieldValue*/} from './normalize-identifiers.js';\nimport createDebugLogger from 'debug';\nimport {normalizePartData, subfieldContainsPartData} from './normalizeSubfieldValueForComparison.js';\n\nconst debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison');\n//const debugData = debug.extend('data');\nconst debugDev = debug.extend('dev');\n\nexport function isEnnakkotietoSubfieldG(subfield) {\n if (subfield.code !== 'g') {\n return false;\n }\n return subfield.value.match(/^ENNAKKOTIETO\\.?$/gui);\n}\n\nfunction debugFieldComparison(oldField, newField) { // NB: Debug-only function!\n /*\n // We may drop certain subfields:\n if (oldField.subfields.length === newField.subfields.length) {\n oldField.subfields.forEach((subfield, index) => {\n const newValue = newField.subfields[index].value;\n if (subfield.value !== newValue) {\n nvdebug(`NORMALIZE SUBFIELD: '${subfield.value}' => '${newValue}'`, debugDev);\n }\n });\n }\n */\n const oldString = fieldToString(oldField);\n const newString = fieldToString(newField);\n if (oldString === newString) {\n return;\n }\n //nvdebug(`NORMALIZE FIELD:\\n '${fieldToString(oldField)}' =>\\n '${fieldToString(newField)}'`, debugDev);\n}\n\nfunction containsHumanName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['100', '600', '700', '800'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction containsCorporateName(tag = '???', subfieldCode = undefined) {\n // NB! This set is for bibs! Auth has 400... What else...\n if (['110', '610', '710', '810'].includes(tag)) {\n if (subfieldCode === undefined || subfieldCode === 'a') {\n return true;\n }\n }\n // Others?\n return false;\n}\n\nfunction skipAllSubfieldNormalizations(value, subfieldCode, tag) {\n\n if (isEnnakkotietoSubfieldG({'code': subfieldCode, value})) {\n return true;\n }\n\n if (tag === '035' && ['a', 'z'].includes(subfieldCode)) { // A\n return true;\n }\n\n if (isControlSubfieldCode(subfieldCode)) {\n return true;\n }\n return false;\n}\n\nfunction skipSubfieldLowercase(value, subfieldCode, tag) {\n // These may contain Roman Numerals...\n if (subfieldContainsPartData(tag, subfieldCode)) {\n return true;\n }\n\n return skipAllSubfieldNormalizations(value, subfieldCode, tag);\n}\n\nfunction skipAllFieldNormalizations(tag) {\n if (['LOW', 'SID'].includes(tag)) {\n return true;\n }\n return false;\n}\n\n\nfunction subfieldValueLowercase(value, subfieldCode, tag) {\n if (skipSubfieldLowercase(value, subfieldCode, tag)) {\n return value;\n }\n\n //return value.toLowerCase();\n const newValue = value.toLowerCase();\n if (newValue !== value) {\n //nvdebug(`SVL ${tag} $${subfieldCode} '${value}' =>`, debugDev);\n //nvdebug(`SVL ${tag} $${subfieldCode} '${newValue}'`, debugDev);\n return newValue;\n }\n return value;\n}\n\nfunction subfieldLowercase(sf, tag) {\n sf.value = subfieldValueLowercase(sf.value, sf.code, tag);\n}\n\nfunction fieldLowercase(field) {\n if (skipFieldLowercase(field)) {\n return;\n }\n\n field.subfields.forEach(sf => subfieldLowercase(sf, field.tag));\n\n function skipFieldLowercase(field) {\n if (skipAllFieldNormalizations(field.tag)) {\n return true;\n }\n // Skip non-interesting fields\n if (!containsHumanName(field.tag) && !containsCorporateName(field.tag) && !['240', '245', '630'].includes(field.tag)) {\n return true;\n }\n\n return false;\n }\n}\n\n\nfunction hack490SubfieldA(field) {\n if (field.tag !== '490') {\n return;\n }\n field.subfields.forEach(sf => removeSarja(sf));\n\n // NB! This won't work, if the punctuation has not been stripped beforehand!\n function removeSarja(subfield) {\n if (subfield.code !== 'a') {\n return;\n }\n const tmp = subfield.value.replace(/ ?-(?:[a-z]|\u00E4|\u00F6)*sarja$/u, '');\n if (tmp.length > 0) {\n subfield.value = tmp;\n return;\n }\n }\n}\n\nexport function tagAndSubfieldCodeReferToIsbn(tag, subfieldCode) {\n // NB! We don't do this to 020$z!\n if (subfieldCode === 'z' && ['765', '767', '770', '772', '773', '774', '776', '777', '780', '785', '786', '787'].includes(tag)) {\n return true;\n }\n if (tag === '020' && subfieldCode === 'a') {\n return true;\n }\n return false;\n}\n\nfunction looksLikeIsbn(value) {\n // Does not check validity!\n if (value.match(/^(?:[0-9]-?){9}(?:[0-9]-?[0-9]-?[0-9]-?)?[0-9Xx]$/u)) {\n return true;\n }\n return false;\n}\n\nfunction normalizeISBN(field) {\n if (!field.subfields) {\n return;\n }\n\n //nvdebug(`ISBN-field? ${fieldToString(field)}`);\n const relevantSubfields = field.subfields.filter(sf => tagAndSubfieldCodeReferToIsbn(field.tag, sf.code) && looksLikeIsbn(sf.value));\n relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));\n\n function normalizeIsbnSubfield(sf) {\n //nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);\n sf.value = sf.value.replace(/-/ug, '');\n sf.value = sf.value.replace(/x/u, 'X');\n }\n\n}\n\nfunction fieldSpecificHacks(field) {\n normalizeISBN(field); // 020$a, not $z!\n hack490SubfieldA(field);\n}\n\nexport function fieldTrimSubfieldValues(field) {\n field.subfields?.forEach((sf) => {\n sf.value = sf.value.replace(/^[ \\t\\n]+/u, '');\n sf.value = sf.value.replace(/[ \\t\\n]+$/u, '');\n sf.value = sf.value.replace(/[ \\t\\n]+/gu, ' ');\n });\n}\n\nfunction fieldRemoveDecomposedDiacritics(field) {\n // Raison d'\u00EAtre/motivation: \"Sir\u00E9n\" and diacriticless \"Siren\" might refer to a same surname, so this normalization\n // allows us to compare authors and avoid duplicate fields.\n field.subfields.forEach((sf) => {\n sf.value = removeDecomposedDiacritics(sf.value);\n });\n}\n\nfunction removeDecomposedDiacritics(value = '') {\n // NB #1: Does nothing to precomposed letters. Do String.normalize('NFD') first, if you want to handle them.\n // NB #2: Finnish letters '\u00E5', '\u00E4', '\u00F6', '\u00C5', \u00C4', and '\u00D6' should be handled (=precomposed) before calling this. (= keep them as is)\n // NB #3: Calling our very own fixComposition() before this function handles both #1 and #2.\n return String(value).replace(/\\p{Diacritic}/gu, '');\n}\n\nfunction normalizeSubfieldValue(value, subfieldCode, tag) {\n // NB! For comparison of values only\n /* eslint-disable */\n value = subfieldValueLowercase(value, subfieldCode, tag);\n\n // Normalize: s. = sivut = pp.\n value = normalizePartData(value, subfieldCode, tag);\n value = value.replace(/^\\[([^[\\]]+)\\]/gu, '$1'); // eslint-disable-line functional/immutable-data\n\n if (['130', '730'].includes(tag) && subfieldCode === 'a') {\n value = value.replace(' : ', ', '); // \"Halloween ends (elokuva, 2022)\" vs \"Halloween ends (elokuva : 2023)\"\n }\n /* eslint-enable */\n\n // Not going to do these in the foreseeable future, but keeping them here for discussion:\n // Possible normalizations include but are not limited to:\n // \u00F8 => \u00F6? Might be language dependent: 041 $a fin => \u00F6, 041 $a eng => o?\n // \u00D8 => \u00D6?\n // \u00DF => ss\n // \u00FE => th (NB! Both upper and lower case)\n // ...\n // Probably nots:\n // \u00FC => y (probably not, though this correlates with Finnish letter-to-sound rules)\n // w => v (OK for Finnish sorting in certain cases, but we are not here, are we?)\n // I guess we should use decomposed values in code here. (Not sure what composition my examples above use.)\n return value;\n}\n\nexport function cloneAndRemovePunctuation(field) {\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n fieldStripPunctuation(clonedField);\n fieldTrimSubfieldValues(clonedField);\n debugDev('PUNC');\n debugFieldComparison(field, clonedField);\n\n return clonedField;\n}\n\nfunction removeCharsThatDontCarryMeaning(value, tag, subfieldCode) {\n if (tag === '080') {\n return value;\n }\n\n // 3\" refers to inches, but as this is for comparison only we don't mind...\n value = value.replace(/['\u2018\u2019\"\u201E\u201C\u201D\u00AB\u00BB]/gu, ''); // MET-570 et al. Subset of https://hexdocs.pm/ex_unicode/Unicode.Category.QuoteMarks.html\n // MRA-273: Handle X00$a name initials.\n // NB #1: that we remove spaces for comparison (as it simpler), though actually space should be used. Doesn't matter as this is comparison only.\n // NB #2: we might/should eventually write a validator/fixer that adds those spaces. After that point, this expection should become obsolete.\n if (subfieldCode === 'a' && ['100', '400', '600', '700', '800'].includes(tag)) { // 400 is used in auth records. It's not a bib field at all.\n value = value.replace(/([A-Z]|\u00C5|\u00C4|\u00D6)\\. +/ugi, '$1.');\n }\n\n return value;\n}\n\nfunction normalizeField(field) {\n //sf.value = removeDecomposedDiacritics(sf.value);\n fieldStripPunctuation(field);\n fieldLowercase(field);\n fieldNormalizeControlNumbers(field); // FIN11 vs FI-MELINDA etc.\n return field;\n}\n\nexport function cloneAndNormalizeFieldForComparison(field) {\n // NB! This new field is for comparison purposes only.\n // Some of the normalizations might be considered a bit overkill for other purposes.\n const clonedField = clone(field);\n if (fieldSkipNormalization(field)) {\n return clonedField;\n }\n clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?\n sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);\n sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);\n });\n\n normalizeField(clonedField);\n fieldRemoveDecomposedDiacritics(clonedField);\n fieldSpecificHacks(clonedField);\n fieldTrimSubfieldValues(clonedField);\n\n\n debugFieldComparison(field, clonedField); // For debugging purposes only\n\n return clonedField;\n}\n\nfunction fieldSkipNormalization(field) {\n if (!field.subfields || ['018', '066', '080', '083'].includes(field.tag)) {\n return true;\n }\n return false;\n}\n"],
|
|
5
|
+
"mappings": "AAQA,OAAO,WAAW;AAClB,SAAQ,6BAA4B;AACpC,SAAQ,eAAe,6BAA4B;AAEnD,SAAQ,oCAAsE;AAC9E,OAAO,uBAAuB;AAC9B,SAAQ,mBAAmB,gCAA+B;AAE1D,MAAM,QAAQ,kBAAkB,0EAA0E;AAE1G,MAAM,WAAW,MAAM,OAAO,KAAK;AAE5B,gBAAS,wBAAwB,UAAU;AAChD,MAAI,SAAS,SAAS,KAAK;AACzB,WAAO;AAAA,EACT;AACA,SAAO,SAAS,MAAM,MAAM,sBAAsB;AACpD;AAEA,SAAS,qBAAqB,UAAU,UAAU;AAYhD,QAAM,YAAY,cAAc,QAAQ;AACxC,QAAM,YAAY,cAAc,QAAQ;AACxC,MAAI,cAAc,WAAW;AAC3B;AAAA,EACF;AAEF;AAEA,SAAS,kBAAkB,MAAM,OAAO,eAAe,QAAW;AAEhE,MAAI,CAAC,OAAO,OAAO,OAAO,KAAK,EAAE,SAAS,GAAG,GAAG;AAC9C,QAAI,iBAAiB,UAAa,iBAAiB,KAAK;AACtD,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,sBAAsB,MAAM,OAAO,eAAe,QAAW;AAEpE,MAAI,CAAC,OAAO,OAAO,OAAO,KAAK,EAAE,SAAS,GAAG,GAAG;AAC9C,QAAI,iBAAiB,UAAa,iBAAiB,KAAK;AACtD,aAAO;AAAA,IACT;AAAA,EACF;AAEA,SAAO;AACT;AAEA,SAAS,8BAA8B,OAAO,cAAc,KAAK;AAE/D,MAAI,wBAAwB,EAAC,QAAQ,cAAc,MAAK,CAAC,GAAG;AAC1D,WAAO;AAAA,EACT;AAEA,MAAI,QAAQ,SAAS,CAAC,KAAK,GAAG,EAAE,SAAS,YAAY,GAAG;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,sBAAsB,YAAY,GAAG;AACvC,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEA,SAAS,sBAAsB,OAAO,cAAc,KAAK;AAEvD,MAAI,yBAAyB,KAAK,YAAY,GAAG;AAC/C,WAAO;AAAA,EACT;AAEA,SAAO,8BAA8B,OAAO,cAAc,GAAG;AAC/D;AAEA,SAAS,2BAA2B,KAAK;AACvC,MAAI,CAAC,OAAO,KAAK,EAAE,SAAS,GAAG,GAAG;AAChC,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAGA,SAAS,uBAAuB,OAAO,cAAc,KAAK;AACxD,MAAI,sBAAsB,OAAO,cAAc,GAAG,GAAG;AACnD,WAAO;AAAA,EACT;AAGA,QAAM,WAAW,MAAM,YAAY;AACnC,MAAI,aAAa,OAAO;AAGtB,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEA,SAAS,kBAAkB,IAAI,KAAK;AAClC,KAAG,QAAQ,uBAAuB,GAAG,OAAO,GAAG,MAAM,GAAG;AAC1D;AAEA,SAAS,eAAe,OAAO;AAC7B,MAAI,mBAAmB,KAAK,GAAG;AAC7B;AAAA,EACF;AAEA,QAAM,UAAU,QAAQ,QAAM,kBAAkB,IAAI,MAAM,GAAG,CAAC;AAE9D,WAAS,mBAAmBA,QAAO;AACjC,QAAI,2BAA2BA,OAAM,GAAG,GAAG;AACzC,aAAO;AAAA,IACT;AAEA,QAAI,CAAC,kBAAkBA,OAAM,GAAG,KAAK,CAAC,sBAAsBA,OAAM,GAAG,KAAK,CAAC,CAAC,OAAO,OAAO,KAAK,EAAE,SAASA,OAAM,GAAG,GAAG;AACpH,aAAO;AAAA,IACT;AAEA,WAAO;AAAA,EACT;AACF;AAGA,SAAS,iBAAiB,OAAO;AAC/B,MAAI,MAAM,QAAQ,OAAO;AACvB;AAAA,EACF;AACA,QAAM,UAAU,QAAQ,QAAM,YAAY,EAAE,CAAC;AAG7C,WAAS,YAAY,UAAU;AAC7B,QAAI,SAAS,SAAS,KAAK;AACzB;AAAA,IACF;AACA,UAAM,MAAM,SAAS,MAAM,QAAQ,4BAA4B,EAAE;AACjE,QAAI,IAAI,SAAS,GAAG;AAClB,eAAS,QAAQ;AACjB;AAAA,IACF;AAAA,EACF;AACF;AAEO,gBAAS,8BAA8B,KAAK,cAAc;AAE/D,MAAI,iBAAiB,OAAO,CAAC,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,OAAO,KAAK,EAAE,SAAS,GAAG,GAAG;AAC9H,WAAO;AAAA,EACT;AACA,MAAI,QAAQ,SAAS,iBAAiB,KAAK;AACzC,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEA,SAAS,cAAc,OAAO;AAE5B,MAAI,MAAM,MAAM,oDAAoD,GAAG;AACrE,WAAO;AAAA,EACT;AACA,SAAO;AACT;AAEA,SAAS,cAAc,OAAO;AAC5B,MAAI,CAAC,MAAM,WAAW;AACpB;AAAA,EACF;AAGA,QAAM,oBAAoB,MAAM,UAAU,OAAO,QAAM,8BAA8B,MAAM,KAAK,GAAG,IAAI,KAAK,cAAc,GAAG,KAAK,CAAC;AACnI,oBAAkB,QAAQ,QAAM,sBAAsB,EAAE,CAAC;AAEzD,WAAS,sBAAsB,IAAI;AAEjC,OAAG,QAAQ,GAAG,MAAM,QAAQ,OAAO,EAAE;AACrC,OAAG,QAAQ,GAAG,MAAM,QAAQ,MAAM,GAAG;AAAA,EACvC;AAEF;AAEA,SAAS,mBAAmB,OAAO;AACjC,gBAAc,KAAK;AACnB,mBAAiB,KAAK;AACxB;AAEO,gBAAS,wBAAwB,OAAO;AAC7C,QAAM,WAAW,QAAQ,CAAC,OAAO;AAC/B,OAAG,QAAQ,GAAG,MAAM,QAAQ,cAAc,EAAE;AAC5C,OAAG,QAAQ,GAAG,MAAM,QAAQ,cAAc,EAAE;AAC5C,OAAG,QAAQ,GAAG,MAAM,QAAQ,cAAc,GAAG;AAAA,EAC/C,CAAC;AACH;AAEA,SAAS,gCAAgC,OAAO;AAG9C,QAAM,UAAU,QAAQ,CAAC,OAAO;AAC9B,OAAG,QAAQ,2BAA2B,GAAG,KAAK;AAAA,EAChD,CAAC;AACH;AAEA,SAAS,2BAA2B,QAAQ,IAAI;AAI9C,SAAO,OAAO,KAAK,EAAE,QAAQ,mBAAmB,EAAE;AACpD;AAEA,SAAS,uBAAuB,OAAO,cAAc,KAAK;AAGxD,UAAQ,uBAAuB,OAAO,cAAc,GAAG;AAGvD,UAAQ,kBAAkB,OAAO,cAAc,GAAG;AAClD,UAAQ,MAAM,QAAQ,oBAAoB,IAAI;AAE9C,MAAI,CAAC,OAAO,KAAK,EAAE,SAAS,GAAG,KAAK,iBAAiB,KAAK;AACxD,YAAQ,MAAM,QAAQ,OAAO,IAAI;AAAA,EACnC;AAcA,SAAO;AACT;AAEO,gBAAS,0BAA0B,OAAO;AAC/C,QAAM,cAAc,MAAM,KAAK;AAC/B,MAAI,uBAAuB,KAAK,GAAG;AACjC,WAAO;AAAA,EACT;AACA,wBAAsB,WAAW;AACjC,0BAAwB,WAAW;AACnC,WAAS,MAAM;AACf,uBAAqB,OAAO,WAAW;AAEvC,SAAO;AACT;AAEA,SAAS,gCAAgC,OAAO,KAAK,cAAc;AACjE,MAAI,QAAQ,OAAO;AACjB,WAAO;AAAA,EACT;AAGA,UAAQ,MAAM,QAAQ,iBAAiB,EAAE;AAIzC,MAAI,iBAAiB,OAAO,CAAC,OAAO,OAAO,OAAO,OAAO,KAAK,EAAE,SAAS,GAAG,GAAG;AAC7E,YAAQ,MAAM,QAAQ,wBAAwB,KAAK;AAAA,EACrD;AAEA,SAAO;AACT;AAEA,SAAS,eAAe,OAAO;AAE7B,wBAAsB,KAAK;AAC3B,iBAAe,KAAK;AACpB,+BAA6B,KAAK;AAClC,SAAO;AACT;AAEO,gBAAS,oCAAoC,OAAO;AAGzD,QAAM,cAAc,MAAM,KAAK;AAC/B,MAAI,uBAAuB,KAAK,GAAG;AACjC,WAAO;AAAA,EACT;AACA,cAAY,UAAU,QAAQ,CAAC,OAAO;AACpC,OAAG,QAAQ,uBAAuB,GAAG,OAAO,GAAG,MAAM,MAAM,GAAG;AAC9D,OAAG,QAAQ,gCAAgC,GAAG,OAAO,MAAM,KAAK,GAAG,IAAI;AAAA,EACzE,CAAC;AAED,iBAAe,WAAW;AAC1B,kCAAgC,WAAW;AAC3C,qBAAmB,WAAW;AAC9B,0BAAwB,WAAW;AAGnC,uBAAqB,OAAO,WAAW;AAEvC,SAAO;AACT;AAEA,SAAS,uBAAuB,OAAO;AACrC,MAAI,CAAC,MAAM,aAAa,CAAC,OAAO,OAAO,OAAO,KAAK,EAAE,SAAS,MAAM,GAAG,GAAG;AACxE,WAAO;AAAA,EACT;AACA,SAAO;AACT;",
|
|
6
|
+
"names": ["field"]
|
|
7
|
+
}
|
|
@@ -1,140 +1,100 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
exports.partsAgree = partsAgree;
|
|
8
|
-
exports.subfieldContainsPartData = subfieldContainsPartData;
|
|
9
|
-
var _utils = require("./utils");
|
|
10
|
-
var _debug = _interopRequireDefault(require("debug"));
|
|
11
|
-
function _interopRequireDefault(e) { return e && e.__esModule ? e : { default: e }; }
|
|
12
|
-
// Normalizes at least 490$v and 773$g which contain information such as "Raita 5" vs "5", and "Osa 3" vs "Osa III".
|
|
13
|
-
|
|
14
|
-
const debug = (0, _debug.default)('@natlibfi/melinda-marc-record-merge-reducers:normalizeSubfieldValueForComparison');
|
|
15
|
-
//const debugData = debug.extend('data');
|
|
16
|
-
const debugDev = debug.extend('dev');
|
|
17
|
-
function subfieldContainsPartData(tag, subfieldCode) {
|
|
18
|
-
// NB! Used by reducers' mergeSubield.js
|
|
19
|
-
if (subfieldCode === 'v' && ['490', '800', '810', '811', '830'].includes(tag)) {
|
|
1
|
+
import { nvdebug } from "./utils.js";
|
|
2
|
+
import createDebugLogger from "debug";
|
|
3
|
+
const debug = createDebugLogger("@natlibfi/melinda-marc-record-merge-reducers:normalizeSubfieldValueForComparison");
|
|
4
|
+
const debugDev = debug.extend("dev");
|
|
5
|
+
export function subfieldContainsPartData(tag, subfieldCode) {
|
|
6
|
+
if (subfieldCode === "v" && ["490", "800", "810", "811", "830"].includes(tag)) {
|
|
20
7
|
return true;
|
|
21
8
|
}
|
|
22
|
-
if (tag ===
|
|
9
|
+
if (tag === "773" && subfieldCode === "g") {
|
|
23
10
|
return true;
|
|
24
11
|
}
|
|
25
12
|
return false;
|
|
26
13
|
}
|
|
27
14
|
function splitPartData(originalValue) {
|
|
28
|
-
|
|
29
|
-
// Remove punctuation and brackets:
|
|
30
|
-
const value = originalValue.replace(/[-.,:; ]+$/ui, '').replace(/^\[([0-9]+)\]$/ui, '$1');
|
|
15
|
+
const value = originalValue.replace(/[-.,:; ]+$/ui, "").replace(/^\[([0-9]+)\]$/ui, "$1");
|
|
31
16
|
const [year, rest] = extractYear(value);
|
|
32
|
-
const splitPoint = rest.lastIndexOf(
|
|
17
|
+
const splitPoint = rest.lastIndexOf(" ");
|
|
33
18
|
if (splitPoint === -1) {
|
|
34
|
-
return [
|
|
19
|
+
return [void 0, year, rest];
|
|
35
20
|
}
|
|
36
21
|
const lhs = rest.substr(0, splitPoint);
|
|
37
22
|
const rhs = rest.substr(splitPoint + 1);
|
|
38
23
|
return [lhs, year, rhs];
|
|
39
|
-
function extractYear(
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
// "2023, 3" => ["2023", "3"]
|
|
43
|
-
if (value.match(/^(?:1[89][0-9][0-9]|20[012][0-9]), (?:nro |n:o)?[1-9][0-9]{0,2}$/ui)) {
|
|
44
|
-
return [value.substr(0, 4), value.substr(6)];
|
|
24
|
+
function extractYear(value2) {
|
|
25
|
+
if (value2.match(/^(?:1[89][0-9][0-9]|20[012][0-9]), (?:nro |n:o)?[1-9][0-9]{0,2}$/ui)) {
|
|
26
|
+
return [value2.substr(0, 4), value2.substr(6)];
|
|
45
27
|
}
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
return [value.substr(0, 4), value.substr(5)];
|
|
28
|
+
if (value2.match(/^(?:1[89][0-9][0-9]|20[012][0-9])[/:][1-9][0-9]{0,2}$/u)) {
|
|
29
|
+
return [value2.substr(0, 4), value2.substr(5)];
|
|
49
30
|
}
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
return [value.substr(len - 4), value.substr(0, len - 5)];
|
|
31
|
+
if (value2.match(/^[^0-9]*[1-9][0-9]{0,2}\/(?:1[89][0-9][0-9]|20[012][0-9])$/u)) {
|
|
32
|
+
const len = value2.length;
|
|
33
|
+
return [value2.substr(len - 4), value2.substr(0, len - 5)];
|
|
54
34
|
}
|
|
55
|
-
return [
|
|
35
|
+
return [void 0, value2];
|
|
56
36
|
}
|
|
57
37
|
}
|
|
58
38
|
function normalizePartType(originalValue) {
|
|
59
|
-
if (originalValue ===
|
|
60
|
-
return
|
|
39
|
+
if (originalValue === void 0) {
|
|
40
|
+
return void 0;
|
|
61
41
|
}
|
|
62
42
|
const value = originalValue.toLowerCase();
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
if (['n:o', 'no', 'nr', 'nro', 'number', 'numero', 'nummer'].includes(value)) {
|
|
66
|
-
return 'numero';
|
|
43
|
+
if (["n:o", "no", "nr", "nro", "number", "numero", "nummer"].includes(value)) {
|
|
44
|
+
return "numero";
|
|
67
45
|
}
|
|
68
|
-
if ([
|
|
69
|
-
return
|
|
46
|
+
if (["band", "bd", "h\xE4fte", "nide", "osa", "part", "teil", "vol", "vol.", "volume"].includes(value)) {
|
|
47
|
+
return "osa";
|
|
70
48
|
}
|
|
71
|
-
if ([
|
|
72
|
-
return
|
|
49
|
+
if (["p.", "page", "pages", "pp.", "s.", "sidor", "sivu", "sivut"].includes(value)) {
|
|
50
|
+
return "sivu";
|
|
73
51
|
}
|
|
74
52
|
return value;
|
|
75
53
|
}
|
|
76
|
-
const romanNumbers = {
|
|
77
|
-
'I': '1',
|
|
78
|
-
'II': '2',
|
|
79
|
-
'III': '3',
|
|
80
|
-
'IV': '4',
|
|
81
|
-
'V': '5',
|
|
82
|
-
'VI': '6',
|
|
83
|
-
'X': '10'
|
|
84
|
-
};
|
|
54
|
+
const romanNumbers = { "I": "1", "II": "2", "III": "3", "IV": "4", "V": "5", "VI": "6", "X": "10" };
|
|
85
55
|
function normalizePartNumber(value) {
|
|
86
|
-
// Should we handle all Roman numbers or some range of them?
|
|
87
|
-
// There's probably a library for our purposes..
|
|
88
56
|
if (value in romanNumbers) {
|
|
89
57
|
const arabicValue = romanNumbers[value];
|
|
90
|
-
|
|
58
|
+
nvdebug(` MAP ${value} to ${arabicValue}`, debugDev);
|
|
91
59
|
return arabicValue;
|
|
92
60
|
}
|
|
93
61
|
return value.toLowerCase();
|
|
94
62
|
}
|
|
95
63
|
function splitAndNormalizePartData(value) {
|
|
96
|
-
// This is just a stub. Does not handle eg. "Levy 2, raita 15"
|
|
97
64
|
const [partType, partYear, partNumber] = splitPartData(value);
|
|
98
|
-
//nvdebug(` LHS: '${lhs}'`, debugDev);
|
|
99
|
-
//nvdebug(` RHS: '${rhs}'`, debugDev);
|
|
100
65
|
return [normalizePartType(partType), partYear, normalizePartNumber(partNumber)];
|
|
101
66
|
}
|
|
102
|
-
function partsAgree(value1, value2, tag, subfieldCode) {
|
|
103
|
-
// Note, that parts can not be normalized away, as "2" can agree with "Part 2" and "Raita 2" and "Volume 2"...
|
|
104
|
-
// NB! Used by reducers' mergeSubield.js
|
|
67
|
+
export function partsAgree(value1, value2, tag, subfieldCode) {
|
|
105
68
|
if (!subfieldContainsPartData(tag, subfieldCode)) {
|
|
106
69
|
return false;
|
|
107
70
|
}
|
|
108
71
|
const [partType1, partYear1, partNumber1] = splitAndNormalizePartData(value1);
|
|
109
72
|
const [partType2, partYear2, partNumber2] = splitAndNormalizePartData(value2);
|
|
110
|
-
//nvdebug(`P1: ${partType1} | ${partYear1} | ${partNumber1}`);
|
|
111
|
-
//nvdebug(`P2: ${partType2} | ${partYear2} | ${partNumber2}`);
|
|
112
73
|
if (partNumber1 !== partNumber2) {
|
|
113
74
|
return false;
|
|
114
75
|
}
|
|
115
|
-
if (partType1 !==
|
|
76
|
+
if (partType1 !== void 0 && partType2 !== void 0 && partType1 !== partType2) {
|
|
116
77
|
return false;
|
|
117
78
|
}
|
|
118
|
-
if (partYear1 !==
|
|
79
|
+
if (partYear1 !== void 0 && partYear2 !== void 0 && partYear1 !== partYear2) {
|
|
119
80
|
return false;
|
|
120
81
|
}
|
|
121
82
|
return true;
|
|
122
83
|
}
|
|
123
|
-
function normalizePartData(value, subfieldCode, tag) {
|
|
124
|
-
// This is for normalizing values for equality comparison only!
|
|
84
|
+
export function normalizePartData(value, subfieldCode, tag) {
|
|
125
85
|
if (!subfieldContainsPartData(tag, subfieldCode)) {
|
|
126
86
|
return value;
|
|
127
87
|
}
|
|
128
88
|
const [partType, partYear, partNumber] = splitAndNormalizePartData(value);
|
|
129
|
-
if (partType ===
|
|
130
|
-
if (partYear ===
|
|
89
|
+
if (partType === void 0) {
|
|
90
|
+
if (partYear === void 0) {
|
|
131
91
|
return partNumber;
|
|
132
92
|
}
|
|
133
93
|
return `${partNumber}/${partYear}`;
|
|
134
94
|
}
|
|
135
|
-
if (partYear ===
|
|
95
|
+
if (partYear === void 0) {
|
|
136
96
|
return `${partType} ${partNumber}`;
|
|
137
97
|
}
|
|
138
98
|
return `${partType} ${partNumber}/${partYear}`;
|
|
139
99
|
}
|
|
140
|
-
//# sourceMappingURL=normalizeSubfieldValueForComparison.js.map
|
|
100
|
+
//# sourceMappingURL=normalizeSubfieldValueForComparison.js.map
|