npm - @natlibfi/marc-record-validators-melinda - Versions diffs - 12.0.0-alpha.1 → 12.0.0-alpha.12 - Mend

@natlibfi/marc-record-validators-melinda 12.0.0-alpha.1 → 12.0.0-alpha.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/.github/workflows/{melinda-node-tests.yml → melinda-node-tests-and-publish.yml} +36 -11
package/dist/access-rights.test.js +1 -1
package/dist/access-rights.test.js.map +1 -1
package/dist/addMissingField337.test.js +1 -1
package/dist/addMissingField337.test.js.map +1 -1
package/dist/addMissingField338.test.js +1 -1
package/dist/addMissingField338.test.js.map +1 -1
package/dist/cyrillux-usemarcon-replacement.test.js +4 -7
package/dist/cyrillux-usemarcon-replacement.test.js.map +2 -2
package/dist/cyrillux.test.js +1 -1
package/dist/cyrillux.test.js.map +1 -1
package/dist/double-commas.test.js +1 -1
package/dist/double-commas.test.js.map +1 -1
package/dist/empty-fields.test.js +1 -1
package/dist/empty-fields.test.js.map +1 -1
package/dist/ending-punctuation-conf.js +6 -4
package/dist/ending-punctuation-conf.js.map +2 -2
package/dist/ending-punctuation.js +88 -18
package/dist/ending-punctuation.js.map +3 -3
package/dist/ending-punctuation.test.js +198 -103
package/dist/ending-punctuation.test.js.map +2 -2
package/dist/field-008-18-34-character-groups.test.js +1 -1
package/dist/field-008-18-34-character-groups.test.js.map +1 -1
package/dist/field-structure.test.js +1 -1
package/dist/field-structure.test.js.map +1 -1
package/dist/index.js +122 -59
package/dist/index.js.map +2 -2
package/dist/indicator-fixes.js +11 -1
package/dist/indicator-fixes.js.map +2 -2
package/dist/isbn-issn.js +8 -5
package/dist/isbn-issn.js.map +2 -2
package/dist/melindaCustomMergeFields.js +1 -1
package/dist/melindaCustomMergeFields.js.map +2 -2
package/dist/merge-fields/counterpartField.js +5 -0
package/dist/merge-fields/counterpartField.js.map +2 -2
package/dist/merge-fields/dataProvenance.js +29 -0
package/dist/merge-fields/dataProvenance.js.map +7 -0
package/dist/merge-fields/index.js +11 -2
package/dist/merge-fields/index.js.map +2 -2
package/dist/merge-fields/mergeField.js +1 -1
package/dist/merge-fields/mergeField.js.map +2 -2
package/dist/merge-fields.test.js +4 -2
package/dist/merge-fields.test.js.map +2 -2
package/dist/mergeField500Lisapainokset.js +1 -1
package/dist/mergeField500Lisapainokset.js.map +2 -2
package/dist/normalizeFieldForComparison.js +24 -0
package/dist/normalizeFieldForComparison.js.map +2 -2
package/dist/punctuation2.js +11 -5
package/dist/punctuation2.js.map +2 -2
package/dist/removeInferiorDataFields.js +2 -1
package/dist/removeInferiorDataFields.js.map +2 -2
package/dist/resolveOrphanedSubfield6s.js +1 -1
package/dist/resolveOrphanedSubfield6s.js.map +2 -2
package/dist/sortSubfields.js +5 -5
package/dist/sortSubfields.js.map +2 -2
package/dist/translate-terms.test.js +12 -2
package/dist/translate-terms.test.js.map +2 -2
package/dist/utils.js +9 -3
package/dist/utils.js.map +2 -2
package/package.json +22 -23
package/src/access-rights.test.js +1 -1
package/src/addMissingField337.test.js +1 -1
package/src/addMissingField338.test.js +1 -1
package/src/cyrillux-usemarcon-replacement.test.js +4 -9
package/src/cyrillux.test.js +1 -1
package/src/double-commas.test.js +1 -1
package/src/empty-fields.test.js +1 -1
package/src/ending-punctuation-conf.js +6 -5
package/src/ending-punctuation.js +115 -24
package/src/ending-punctuation.test.js +187 -104
package/src/field-008-18-34-character-groups.test.js +1 -1
package/src/field-structure.test.js +1 -1
package/src/index.js +132 -59
package/src/indicator-fixes.js +14 -1
package/src/isbn-issn.js +11 -6
package/src/melindaCustomMergeFields.js +1 -1
package/src/merge-fields/counterpartField.js +6 -0
package/src/merge-fields/dataProvenance.js +41 -0
package/src/merge-fields/index.js +11 -2
package/src/merge-fields/mergeField.js +2 -2
package/src/merge-fields.test.js +6 -2
package/src/mergeField500Lisapainokset.js +1 -1
package/src/normalizeFieldForComparison.js +26 -0
package/src/punctuation2.js +14 -5
package/src/removeInferiorDataFields.js +4 -1
package/src/resolveOrphanedSubfield6s.js +1 -1
package/src/sortSubfields.js +7 -5
package/src/translate-terms.test.js +25 -2
package/src/utils.js +19 -3
package/test-fixtures/indicator-fixes/10/expectedResult.json +11 -0
package/test-fixtures/indicator-fixes/10/metadata.json +4 -0
package/test-fixtures/indicator-fixes/10/record.json +11 -0
package/test-fixtures/merge-fields/f05/expectedResult.json +24 -0
package/test-fixtures/merge-fields/f05/metadata.json +6 -0
package/test-fixtures/merge-fields/f05/record.json +30 -0
package/test-fixtures/remove-inferior-datafields/f16/expectedResult.json +12 -0
package/test-fixtures/remove-inferior-datafields/f16/metadata.json +5 -0
package/test-fixtures/remove-inferior-datafields/f16/record.json +14 -0
package/test-fixtures/translate-terms-data.js +42 -0
package/src/melindaCustomMergeFields.json +0 -5120

package/src/field-008-18-34-character-groups.test.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import assert from 'node:assert';
 import {MarcRecord} from '@natlibfi/marc-record';
-import validatorFactory from '../src/field-008-18-34-character-groups.js';
+import validatorFactory from './field-008-18-34-character-groups.js';
 import {READERS} from '@natlibfi/fixura';
 import generateTests from '@natlibfi/fixugen';
 import createDebugLogger from 'debug';

package/src/field-structure.test.js CHANGED Viewed

@@ -1,7 +1,7 @@
 import assert from 'node:assert';
 import {describe, it} from 'node:test';
 import {MarcRecord} from '@natlibfi/marc-record';
-import validatorFactory from '../src/field-structure.js';
+import validatorFactory from './field-structure.js';
 // Factory validation

package/src/index.js CHANGED Viewed

@@ -1,62 +1,85 @@
-import AccessRights from './access-rights';
-import AddMissingField041 from './addMissingField041';
-import AddMissingField336 from './addMissingField336';
-import AddMissingField337 from './addMissingField337';
-import AddMissingField338 from './addMissingField338';
-import Cyrillux from './cyrillux';
-import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement';
-import DisambiguateSeriesStatements from './disambiguateSeriesStatements';
-import DoubleCommas from './double-commas';
-import DuplicatesInd1 from './duplicates-ind1';
-import EmptyFields from './empty-fields';
-import EndingPunctuation from './ending-punctuation';
-import EndingWhitespace from './ending-whitespace';
-import Field008CharacterGroups from './field-008-18-34-character-groups';
-import Field505Separators from './field-505-separators';
-import Field521Fix from './field-521-fix';
-import FieldExclusion from './field-exclusion';
-import FieldStructure from './field-structure';
-import FieldsPresent from './fields-present';
-import Fix33X from './fix-33X';
-import FixCountryCodes from './fix-country-codes';
-import FixLanguageCodes from './fix-language-codes';
-import FixRelatorTerms from './fixRelatorTerms';
-import FixedFields from './fixed-fields';
-import IdenticalFields from './identical-fields';
-import IndicatorFixes from './indicator-fixes';
-import IsbnIssn from './isbn-issn';
-import ItemLanguage from './item-language';
-import MergeField500Lisapainokset from './mergeField500Lisapainokset';
-import MergeFields from './merge-fields/';
-import MergeRelatorTermFields from './mergeRelatorTermFields';
-import Modernize502 from './modernize-502';
-import MultipleSubfield0s from './multiple-subfield-0';
-import NonBreakingSpace from './non-breaking-space';
-import NormalizeDashes from './normalize-dashes';
-import NormalizeIdentifiers from './normalize-identifiers';
-import NormalizeQualifyingInformation from './normalize-qualifying-information';
-import NormalizeUTF8Diacritics from './normalize-utf8-diacritics';
-import Punctuation from './punctuation/';
-import Punctuation2 from './punctuation2';
-import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers';
-import RemoveDuplicateDataFields from './removeDuplicateDataFields';
-import RemoveInferiorDataFields from './removeInferiorDataFields';
-import ResolvableExtReferences from './resolvable-ext-references-melinda';
-import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s';
-import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes';
-import SortFields from './sortFields';
-import SortRelatorTerms from './sortRelatorTerms';
-import SortSubfields from './sortSubfields';
-import SortTags from './sort-tags';
+import AccessRights from './access-rights.js';
+import AddMissingField041 from './addMissingField041.js';
+import AddMissingField336 from './addMissingField336.js';
+import AddMissingField337 from './addMissingField337.js';
+import AddMissingField338 from './addMissingField338.js';
+import Cyrillux from './cyrillux.js';
+import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement.js';
+import DisambiguateSeriesStatements from './disambiguateSeriesStatements.js';
+import DoubleCommas from './double-commas.js';
+import DuplicatesInd1 from './duplicates-ind1.js';
+import EmptyFields from './empty-fields.js';
+import EndingPunctuation from './ending-punctuation.js';
+import EndingWhitespace from './ending-whitespace.js';
+import Field008CharacterGroups from './field-008-18-34-character-groups.js';
+import Field505Separators from './field-505-separators.js';
+import Field521Fix from './field-521-fix.js';
+import FieldExclusion from './field-exclusion.js';
+import FieldStructure from './field-structure.js';
+import FieldsPresent from './fields-present.js';
+import Fix33X from './fix-33X.js';
+import FixCountryCodes from './fix-country-codes.js';
+import FixLanguageCodes from './fix-language-codes.js';
+import FixRelatorTerms from './fixRelatorTerms.js';
+import FixedFields from './fixed-fields.js';
+import IdenticalFields from './identical-fields.js';
+import IndicatorFixes from './indicator-fixes.js';
+import IsbnIssn from './isbn-issn.js';
+import ItemLanguage from './item-language.js';
+import MergeField500Lisapainokset from './mergeField500Lisapainokset.js';
+import MergeFields from './merge-fields/index.js';
+import MergeRelatorTermFields from './mergeRelatorTermFields.js';
+import Modernize502 from './modernize-502.js';
+import MultipleSubfield0s from './multiple-subfield-0.js';
+import NonBreakingSpace from './non-breaking-space.js';
+import NormalizeDashes from './normalize-dashes.js';
+import NormalizeIdentifiers from './normalize-identifiers.js';
+import NormalizeQualifyingInformation from './normalize-qualifying-information.js';
+import NormalizeUTF8Diacritics from './normalize-utf8-diacritics.js';
+import Punctuation from './punctuation/index.js';
+import Punctuation2 from './punctuation2.js';
+import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers.js';
+import RemoveDuplicateDataFields from './removeDuplicateDataFields.js';
+import RemoveInferiorDataFields from './removeInferiorDataFields.js';
+import ResolvableExtReferences from './resolvable-ext-references-melinda.js';
+import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s.js';
+import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes.js';
+import SortFields from './sortFields.js';
+import SortRelatorTerms from './sortRelatorTerms.js';
+import SortSubfields from './sortSubfields.js';
+import SortTags from './sort-tags.js';
 // import StripPunctuation from './stripPunctuation'; // Can we add this here? Should be used very cautiosly!
-import SubfieldValueNormalizations from './subfieldValueNormalizations';
-import SubfieldExclusion from './subfield-exclusion';
-import Sync007And300 from './sync-007-and-300';
-import TranslateTerms from './translate-terms';
-import TypeOfDateF008 from './typeOfDate-008';
-import UnicodeDecomposition from './unicode-decomposition';
-import UpdateField540 from './update-field-540';
-import Urn from './urn';
+import SubfieldValueNormalizations from './subfieldValueNormalizations.js';
+import SubfieldExclusion from './subfield-exclusion.js';
+import Sync007And300 from './sync-007-and-300.js';
+import TranslateTerms from './translate-terms.js';
+import TypeOfDateF008 from './typeOfDate-008.js';
+import UnicodeDecomposition from './unicode-decomposition.js';
+import UpdateField540 from './update-field-540.js';
+import Urn from './urn.js';
+import {getCounterpart} from './merge-fields/counterpartField.js';
+import {postprocessRecords} from './merge-fields/mergeOrAddPostprocess.js';
+import {mergeField} from './merge-fields/mergeField.js';
+import {fieldGetOccurrenceNumberPairs, fieldGetUnambiguousOccurrenceNumber, fieldToNormalizedString, fieldsToNormalizedString, get6s,
+  isValidSubfield6, recordGetMaxSubfield6OccurrenceNumberAsInteger,
+  intToOccurrenceNumberString, resetSubfield6Tag, subfield6ResetOccurrenceNumber, subfield6GetOccurrenceNumber,
+  subfield6GetOccurrenceNumberAsInteger} from './subfield6Utils.js';
+import {getSubfield8LinkingNumber, isValidSubfield8, recordGetAllSubfield8LinkingNumbers, recordGetFieldsWithSubfield8LinkingNumber} from './subfield8Utils.js';
+import {recordFixRelatorTerms} from './fixRelatorTerms.js';
+import {fieldTrimSubfieldValues} from './normalizeFieldForComparison.js';
+import {baseHasEqualOrHigherEncodingLevel, deleteAllPrepublicationNotesFromField500InNonPubRecord, encodingLevelIsBetterThanPrepublication, getEncodingLevel, isEnnakkotietoField, isEnnakkotietoSubfield} from './prepublicationUtils.js';
+import {melindaFieldSpecs} from '@natlibfi/marc-record-validators-melinda/dist/melindaCustomMergeFields.js';
+import {cloneAndRemovePunctuation} from './normalizeFieldForComparison.js';
+import {removeWorsePrepubField500s, removeWorsePrepubField594s} from './prepublicationUtils.js';
+import {fieldFixPunctuation} from './punctuation2.js';
+import {recordResetSubfield6OccurrenceNumbers} from './reindexSubfield6OccurenceNumbers.js';
+import {sortAdjacentSubfields} from './sortSubfields.js';
+import {fieldsToString} from './utils.js';
 export {
   AccessRights,
@@ -116,5 +139,55 @@ export {
   UpdateField540,
   Urn,
   SortFields, // Keep this penultimate
-  MergeFields // Run this last *iff* you want to use this at all
+  MergeFields, // Run this last *iff* you want to use this at all
+  // Functions for processing record... These should probably go to some other project.
+  // Too specific for marc-record-js though...
+  // 1. generic low level stuff
+  getEncodingLevel,
+  // 2. text normalizations (eg. for similarity comparisons, field merge)
+  cloneAndRemovePunctuation,
+  fieldFixPunctuation,
+  fieldToNormalizedString,
+  fieldTrimSubfieldValues,
+  fieldsToNormalizedString,
+  fieldsToString,
+  recordFixRelatorTerms,
+  sortAdjacentSubfields,
+  // 3. prepublication stuff
+  baseHasEqualOrHigherEncodingLevel,
+  deleteAllPrepublicationNotesFromField500InNonPubRecord,
+  encodingLevelIsBetterThanPrepublication,
+  isEnnakkotietoField,
+  isEnnakkotietoSubfield,
+  removeWorsePrepubField500s,
+  removeWorsePrepubField594s,
+  // 4. subfield $6 related functions
+  fieldGetOccurrenceNumberPairs,
+  get6s,
+  fieldGetUnambiguousOccurrenceNumber,
+  intToOccurrenceNumberString,
+  isValidSubfield6,
+  recordGetMaxSubfield6OccurrenceNumberAsInteger,
+  recordResetSubfield6OccurrenceNumbers,
+  resetSubfield6Tag,
+  subfield6ResetOccurrenceNumber,
+  subfield6GetOccurrenceNumber,
+  subfield6GetOccurrenceNumberAsInteger,
+  // 5. subfield $8 related functions
+  getSubfield8LinkingNumber,
+  isValidSubfield8,
+  recordGetAllSubfield8LinkingNumbers,
+  recordGetFieldsWithSubfield8LinkingNumber,
+  // 6. merge, other
+  getCounterpart, // field merge: finds a similar field with which a field can merge
+  melindaFieldSpecs, // contains information about the legal fields and subfields, and their repeatability
+  mergeField,
+  postprocessRecords // clean-up function that cleans up both base and source record (which may be the same)
 };

package/src/indicator-fixes.js CHANGED Viewed

@@ -171,6 +171,18 @@ function normalize245Indicator1(field, record) {
   field.ind1 = field1XX.length === 0 ? '0' : '1';
 }
+function noDisplayConstantGenerated520Indicator1(field) {
+  if (field.tag !== '520') {
+    return;
+  }
+  const as = field.subfields.filter(sf => sf.code === 'a');
+  // Set ind1=8 "no display constant generated" fro certain values (part of MELKEHITYS-2579):
+  if (as.length === 1 && ['Abstract.', 'Abstrakt.', 'Abstrakti.', 'Abstract.', 'English Summary.', 'Sammandrag.', 'Tiivistelmä.'].includes(field.subfields[0].value)) {
+    field.ind1 = '8';
+  }
+}
 function normalize776Indicator2(field) {
   if (field.tag !== '776') {
     return;
@@ -229,7 +241,7 @@ function getLanguages(record) {
 }
-export function recordNormalizeIndicators(record) {
+function recordNormalizeIndicators(record) {
   recordNormalize490(record);
   // Language is used to handle non-filing indicators
@@ -242,6 +254,7 @@ export function recordNormalizeIndicators(record) {
 function fieldNormalizeIndicators(field, record, languages) {
   normalize084Indicator1(field);
   normalize245Indicator1(field, record);
+  noDisplayConstantGenerated520Indicator1(field);
   normalizeNonFilingIndicator1(field, languages);
   normalizeNonFilingIndicator2(field, languages);
   normalize776Indicator2(field);

package/src/isbn-issn.js CHANGED Viewed

@@ -1,5 +1,5 @@
 import ISBN from 'isbn3';
-import validateISSN from '@natlibfi/issn-verify';
+import {issn as validateISSN} from '@natlibfi/issn-verify';
 // handleInvalid: move invalid 020$a to 020$z, and invalid 022$a to 022$y
 export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
@@ -29,8 +29,14 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
   function invalidISBN(isbn) {
     const isbnOnly = getFirstWord(isbn);
-    const auditedIsbn = ISBN.audit(isbnOnly);
-    return !auditedIsbn.validIsbn;
+    try {
+      const auditedIsbn = ISBN.audit(isbnOnly);
+      return !auditedIsbn.validIsbn;
+    }
+    catch {
+      return true;
+    }
   }
   function invalidSubfield(subfield) {
@@ -238,9 +244,8 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
       function normalizeIsbnValue(value) {
         const trimmedValue = getFirstWord(value);
         //const trimmedValue = trimISBN(value); // NB! This might lose information that should be stored in $q...
-        const auditResult = ISBN.audit(trimmedValue);
-        if (!auditResult.validIsbn) {
-          return undefined;
+        if (invalidISBN(trimmedValue)) {
+          return undefined; // should this return value (= nothing normalized), not undefined?
         }
         const numbersOnly = trimmedValue.replace(/[^0-9Xx]+/ug, '');
         const parsedIsbn = ISBN.parse(trimmedValue);

package/src/melindaCustomMergeFields.js CHANGED Viewed

@@ -1,4 +1,4 @@
-export const melindaCustomMergeFields = {'fields':
+export const melindaFieldSpecs = {'fields':
   [
     {
       'tag': 'leader',

package/src/merge-fields/counterpartField.js CHANGED Viewed

@@ -11,6 +11,7 @@ import {controlSubfieldsPermitMerge} from './controlSubfields.js';
 import {mergableIndicator1, mergableIndicator2} from './mergableIndicator.js';
 import {partsAgree} from '../normalizeSubfieldValueForComparison.js';
 import {normalizeForSamenessCheck, valueCarriesMeaning} from './worldKnowledge.js';
+import { provenanceSubfieldsPermitMerge } from './dataProvenance.js';
 const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:mergeField:counterpart');
 //const debugData = debug.extend('data');
@@ -375,6 +376,11 @@ function syntacticallyMergablePair(baseField, sourceField, config) {
     return false;
   }
+  if (!provenanceSubfieldsPermitMerge(baseField, sourceField)) {
+    nvdebug('non-mergable (reason: data provenance subfield)', debugDev);
+    return false;
+  }
   // NB! field1.tag and field2.tag might differ (1XX vs 7XX). Therefore required subfields might theoretically differ as well.
   // Note: Theoretically 260 $efg vs 264 with IND2=3 has already been handled by the preprocessor.
   // Thus check both:

package/src/merge-fields/dataProvenance.js ADDED Viewed

@@ -0,0 +1,41 @@
+// See https://www.loc.gov/marc/bibliographic/bdapndxj.html for details
+import {subfieldArraysContainSameData} from "../utils.js";
+export function tagToDataProvenanceSubfieldCode(tag) {
+    if ( ['533', '800', '810', '811', '830'].includes(tag)) {
+        return 'y';
+    }
+    if ( tag === '856' || tag === '857' ) {
+        return 'e';
+    }
+    if ( tag.match(/^7[678]/u) ) {
+        return 'l'
+    }
+    if ( tag.match(/^00/u)) {
+        return undefined;
+    }
+    return '7';
+}
+export function provenanceSubfieldsPermitMerge(baseField, sourceField) {
+    const provinanceSubfieldCode = tagToDataProvenanceSubfieldCode(baseField.tag);
+    if (!baseField.subfields) {
+        return true;
+    }
+    if (provinanceSubfieldCode === undefined) {
+        return false;
+    }
+    const baseProvinanceSubfields = baseField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
+    const sourceProvinanceSubfields = sourceField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
+    // Currently we just compare two arrays. Later on we might do something more sophisticated with specific $7 data provenance category/relationship codes,
+    // or actual values.
+    return subfieldArraysContainSameData(baseProvinanceSubfields, sourceProvinanceSubfields);
+}

package/src/merge-fields/index.js CHANGED Viewed

@@ -18,16 +18,25 @@ import {mergeConfig as defaultConfig} from './mergeConfig.js';
 //const defaultConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '..', '..', 'src', 'merge-fields', 'config.json'), 'utf8'));
-export default function () {
+export default function (defaultTagPattern = undefined) {
   return {
     description, validate, fix
   };
+  function getTagPattern(config) {
+    if (config && config.tagPattern) {
+      return config.tagPattern;
+    }
+    if (defaultTagPattern) { // Used by tests
+      return defaultTagPattern;
+    }
+    return '^[1678](?:00|10|11|30)$';
+  }
   function mergeFieldsWithinRecord(record, config) {
     //const candFields = record.fields.toReversed(); // Node 20+ only! Filter via config?
-    const fields = config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
+    const fields = record.get(getTagPattern(config)); // config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
     fields.reverse();
     const mergedField = fields.find(f => mergeField(record, record, f, config));

package/src/merge-fields/mergeField.js CHANGED Viewed

@@ -106,8 +106,8 @@ function skipMergeField(baseRecord, sourceField, config) {
     return true;
   }
-  // Skip duplicate field:
-  if (baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
+  // Skip duplicate field when merging two records (NB! internal merge merges/removes the duplicate field):
+  if (!baseRecord.internalMerge && baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
     nvdebug(`skipMergeField(): field '${fieldToString(sourceField)}' already exists! No merge required!`, debugDev);
     sourceField.deleted = 1;
     return true;

package/src/merge-fields.test.js CHANGED Viewed

@@ -4,6 +4,7 @@ import validatorFactory from './merge-fields/index.js';
 import {READERS} from '@natlibfi/fixura';
 import generateTests from '@natlibfi/fixugen';
 import createDebugLogger from 'debug';
+import { nvdebug } from './utils.js';
 generateTests({
   callback,
@@ -29,17 +30,20 @@ async function testValidatorFactory() {
   assert.equal(typeof validator.validate, 'function');
 }
-async function callback({getFixture, enabled = true, fix = false}) {
+async function callback({getFixture, enabled = true, fix = false, tagPattern = false}) {
   if (enabled === false) {
     debug('TEST SKIPPED!');
     return;
   }
-  const validator = await validatorFactory();
+  nvdebug(`TAG PATTERN: ${tagPattern}`);
+  const validator = await validatorFactory(tagPattern);
   const record = new MarcRecord(getFixture('record.json'));
   const expectedResult = getFixture('expectedResult.json');
   // console.log(expectedResult); // eslint-disable-line
+  // NB! This validator will only use tags matching /^[1678](?:00|10|11|30)$/ unless tagPattern is specified!
   if (!fix) {
     const result = await validator.validate(record);
     assert.deepEqual(result, expectedResult);

package/src/mergeField500Lisapainokset.js CHANGED Viewed

@@ -127,7 +127,7 @@ function extractAllPrintData(relevantFields) {
 }
-export function mergeLisapainokset(record) {
+function mergeLisapainokset(record) {
   const relevantFields = getRelevantFields(record);
   if (relevantFields.length < 2) {
     return;

package/src/normalizeFieldForComparison.js CHANGED Viewed

@@ -19,6 +19,9 @@ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:no
 const debugDev = debug.extend('dev');
 export function isEnnakkotietoSubfieldG(subfield) {
+  if (valuelessSubfield(subfield)) {
+    return false;
+  }
   if (subfield.code !== 'g') {
     return false;
   }
@@ -116,6 +119,9 @@ function subfieldValueLowercase(value, subfieldCode, tag) {
 }
 function subfieldLowercase(sf, tag) {
+  if (valuelessSubfield(sf)) {
+    return;
+  }
   sf.value = subfieldValueLowercase(sf.value, sf.code, tag);
 }
@@ -148,6 +154,10 @@ function hack490SubfieldA(field) {
   // NB! This won't work, if the punctuation has not been stripped beforehand!
   function removeSarja(subfield) {
+    if (valuelessSubfield(subfield)) {
+      return;
+    }
     if (subfield.code !== 'a') {
       return;
     }
@@ -188,6 +198,9 @@ function normalizeISBN(field) {
   relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));
   function normalizeIsbnSubfield(sf) {
+    if (valuelessSubfield(sf)) {
+      return;
+    }
     //nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);
     sf.value = sf.value.replace(/-/ug, '');
     sf.value = sf.value.replace(/x/u, 'X');
@@ -202,6 +215,9 @@ function fieldSpecificHacks(field) {
 export function fieldTrimSubfieldValues(field) {
   field.subfields?.forEach((sf) => {
+    if (valuelessSubfield(sf)) {
+      return;
+    }
     sf.value = sf.value.replace(/^[ \t\n]+/u, '');
     sf.value = sf.value.replace(/[ \t\n]+$/u, '');
     sf.value = sf.value.replace(/[ \t\n]+/gu, ' ');
@@ -212,6 +228,9 @@ function fieldRemoveDecomposedDiacritics(field) {
   // Raison d'être/motivation: "Sirén" and diacriticless "Siren" might refer to a same surname, so this normalization
   // allows us to compare authors and avoid duplicate fields.
   field.subfields.forEach((sf) => {
+    if (valuelessSubfield(sf)) {
+        return;
+    }
     sf.value = removeDecomposedDiacritics(sf.value);
   });
 }
@@ -297,6 +316,9 @@ export function cloneAndNormalizeFieldForComparison(field) {
     return clonedField;
   }
   clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?
+    if (valuelessSubfield(sf)) {
+      return;
+    }
     sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);
     sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);
   });
@@ -318,3 +340,7 @@ function fieldSkipNormalization(field) {
   }
   return false;
 }
+function valuelessSubfield(sf) {
+  return sf.value === undefined;
+}

package/src/punctuation2.js CHANGED Viewed

@@ -10,9 +10,10 @@
 *          (They are jumped over when looking for next (non-controlfield subfield)
 */
 import {validateSingleField} from './ending-punctuation.js';
+import {tagToDataProvenanceSubfieldCode} from './merge-fields/dataProvenance.js';
 import {fieldGetUnambiguousTag} from './subfield6Utils.js';
 //import createDebugLogger from 'debug';
-import {fieldToString, nvdebug} from './utils.js';
+import {fieldToString, isControlSubfieldCode, nvdebug} from './utils.js';
 import clone from 'clone';
 //const debug = createDebugLogger('debug/punctuation2');
@@ -49,12 +50,19 @@ export default function () {
   }
 }
-function isControlSubfield(subfield) {
-  return ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'].includes(subfield.code);
+function isIrrelevantSubfield(subfield, tag) {
+  const dataProvenanceSubfieldCode = tagToDataProvenanceSubfieldCode(tag);
+  if (subfield.code === dataProvenanceSubfieldCode) {
+    return true;
+  }
+  return isControlSubfieldCode(subfield.code); // Currently this contains other stuff as well ($3, $4, $7, $9...)
 }
 function getNextRelevantSubfield(field, currSubfieldIndex) {
-  return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isControlSubfield(subfield));
+  return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isIrrelevantSubfield(subfield, field.tag));
 }
 export function fieldGetFixedString(field, add = true) {
@@ -155,7 +163,8 @@ const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remov
 const linkingEntryRemoveWhatever = [
   {'code': 'i', 'followedBy': 'at', 'remove': / ?:$/u}, // ':'
   {'code': 'at', 'remove': /\.$/u},
-  {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz', 'remove': /\. -$/u}
+  // Only ". -" separator is still used in music. We can strip it, but can only create the non-music punctuation!
+  {'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz#', 'remove': /\. -$/u}
 ];

package/src/removeInferiorDataFields.js CHANGED Viewed

@@ -332,7 +332,10 @@ function deriveIndividualDeletables(record) {
     // MET-575 (merge: applies in postprocessing)
     const inferiorTerms = getPrepublicationTerms(currString);
-    const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms];
+    // MELKEHITYS-3277-ish: non-AI is better than AI (a rare case where longer version is inferior):
+    const aiBased = `${currString} ‡7 (dpenmw)AI`;
+    const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms, aiBased];
     if (subsets.length) {
       return processTodoList([...stillToDo, ...moreToDo], newDeletables);

package/src/resolveOrphanedSubfield6s.js CHANGED Viewed

@@ -41,7 +41,7 @@ export default function () {
   }
 }
-export function recordFixSubfield6OccurrenceNumbers(record) {
+function recordFixSubfield6OccurrenceNumbers(record) {
   const fieldsContainingSubfield6 = record.fields.filter(field => fieldHasSubfield(field, '6'));
   const orphanedFields = getOrphanedFields(fieldsContainingSubfield6);

package/src/sortSubfields.js CHANGED Viewed

@@ -67,9 +67,9 @@ export default function (defaultTagPattern) {
 // X00, X10, X11 and X130 could also for their own sets...
 // (ouch! sometimes $c comes after $d...): LoC: 100 0# ‡a Black Foot, ‡c Chief, ‡d d. 1877 ‡c (Spirit)
-const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '5', '9']; // skip $g. Can't remember why, though...
-const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '5', '9']; // somewhat iffy
-const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '5', '9'];
+const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '1', '5', '9']; // skip $g. Can't remember why, though...
+const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '1', '5', '9']; // somewhat iffy
+const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '1', '5', '9'];
 const sortOrderFor7XX = ['8', '7', 'i', 'a', 's', 't', 'b', 'c', 'd', 'm', 'h', 'k', 'o', 'x', 'z', 'g', 'q', 'w'];
 const sortOrderFor246 = ['i', 'a', 'b', 'n', 'p', 'f', '5', '9']; // Used by field 946 as well
@@ -89,7 +89,7 @@ const subfieldSortOrder = [
   {'tag': '245', 'sortOrder': ['a', 'b', 'n', 'p', 'k', 'f', 'c']},
   {'tag': '246', 'sortOrder': sortOrderFor246},
   {'tag': '382', 'sortOrder': ['a']},
-  {'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0']},
+  {'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0', '1']},
   {'tag': '386', 'sortOrder': ['8', 'm', 'n', 'a']},
   {'tag': '490', 'sortOrder': ['a', 'x', 'y', 'v', 'l']},
   {'tag': '505', 'sortOrder': ['a']},
@@ -218,7 +218,9 @@ export function sortAdjacentSubfields(field, externalSortOrder = []) {
   const finnishWay = twoBeforeZero(field);
-  const controlSubfieldOrder = finnishWay ? ['8', '7', '3', 'a', '4', '2', '0', '1', '5', '9'] : ['8', '7', '3', 'a', '4', '0', '2', '1', '5', '9'];
+  // Note: 760-789: '7' comes way earlier (after '6' and '8')
+  const controlSubfieldOrder = finnishWay ? ['8', '3', 'a', '4', '2', '0', '1', '7', '5', '9'] : ['8', '7', '3', 'a', '4', '0', '1', '2', '7', '5', '9'];
   swapSubfields(field, controlSubfieldOrder);
   const sortOrderForField = externalSortOrder.length > 0 ? externalSortOrder : getSubfieldSortOrder(field);