@natlibfi/marc-record-validators-melinda 12.0.0-alpha.1 → 12.0.0-alpha.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/{melinda-node-tests.yml → melinda-node-tests-and-publish.yml} +36 -11
- package/dist/access-rights.test.js +1 -1
- package/dist/access-rights.test.js.map +1 -1
- package/dist/addMissingField337.test.js +1 -1
- package/dist/addMissingField337.test.js.map +1 -1
- package/dist/addMissingField338.test.js +1 -1
- package/dist/addMissingField338.test.js.map +1 -1
- package/dist/cyrillux-usemarcon-replacement.test.js +4 -7
- package/dist/cyrillux-usemarcon-replacement.test.js.map +2 -2
- package/dist/cyrillux.test.js +1 -1
- package/dist/cyrillux.test.js.map +1 -1
- package/dist/double-commas.test.js +1 -1
- package/dist/double-commas.test.js.map +1 -1
- package/dist/empty-fields.test.js +1 -1
- package/dist/empty-fields.test.js.map +1 -1
- package/dist/ending-punctuation-conf.js +6 -4
- package/dist/ending-punctuation-conf.js.map +2 -2
- package/dist/ending-punctuation.js +88 -18
- package/dist/ending-punctuation.js.map +3 -3
- package/dist/ending-punctuation.test.js +198 -103
- package/dist/ending-punctuation.test.js.map +2 -2
- package/dist/field-008-18-34-character-groups.test.js +1 -1
- package/dist/field-008-18-34-character-groups.test.js.map +1 -1
- package/dist/field-structure.test.js +1 -1
- package/dist/field-structure.test.js.map +1 -1
- package/dist/index.js +122 -59
- package/dist/index.js.map +2 -2
- package/dist/indicator-fixes.js +11 -1
- package/dist/indicator-fixes.js.map +2 -2
- package/dist/isbn-issn.js +8 -5
- package/dist/isbn-issn.js.map +2 -2
- package/dist/melindaCustomMergeFields.js +1 -1
- package/dist/melindaCustomMergeFields.js.map +2 -2
- package/dist/merge-fields/counterpartField.js +5 -0
- package/dist/merge-fields/counterpartField.js.map +2 -2
- package/dist/merge-fields/dataProvenance.js +29 -0
- package/dist/merge-fields/dataProvenance.js.map +7 -0
- package/dist/merge-fields/index.js +11 -2
- package/dist/merge-fields/index.js.map +2 -2
- package/dist/merge-fields/mergeField.js +1 -1
- package/dist/merge-fields/mergeField.js.map +2 -2
- package/dist/merge-fields.test.js +4 -2
- package/dist/merge-fields.test.js.map +2 -2
- package/dist/mergeField500Lisapainokset.js +1 -1
- package/dist/mergeField500Lisapainokset.js.map +2 -2
- package/dist/normalizeFieldForComparison.js +24 -0
- package/dist/normalizeFieldForComparison.js.map +2 -2
- package/dist/punctuation2.js +11 -5
- package/dist/punctuation2.js.map +2 -2
- package/dist/removeInferiorDataFields.js +2 -1
- package/dist/removeInferiorDataFields.js.map +2 -2
- package/dist/resolveOrphanedSubfield6s.js +1 -1
- package/dist/resolveOrphanedSubfield6s.js.map +2 -2
- package/dist/sortSubfields.js +5 -5
- package/dist/sortSubfields.js.map +2 -2
- package/dist/translate-terms.test.js +12 -2
- package/dist/translate-terms.test.js.map +2 -2
- package/dist/utils.js +9 -3
- package/dist/utils.js.map +2 -2
- package/package.json +22 -23
- package/src/access-rights.test.js +1 -1
- package/src/addMissingField337.test.js +1 -1
- package/src/addMissingField338.test.js +1 -1
- package/src/cyrillux-usemarcon-replacement.test.js +4 -9
- package/src/cyrillux.test.js +1 -1
- package/src/double-commas.test.js +1 -1
- package/src/empty-fields.test.js +1 -1
- package/src/ending-punctuation-conf.js +6 -5
- package/src/ending-punctuation.js +115 -24
- package/src/ending-punctuation.test.js +187 -104
- package/src/field-008-18-34-character-groups.test.js +1 -1
- package/src/field-structure.test.js +1 -1
- package/src/index.js +132 -59
- package/src/indicator-fixes.js +14 -1
- package/src/isbn-issn.js +11 -6
- package/src/melindaCustomMergeFields.js +1 -1
- package/src/merge-fields/counterpartField.js +6 -0
- package/src/merge-fields/dataProvenance.js +41 -0
- package/src/merge-fields/index.js +11 -2
- package/src/merge-fields/mergeField.js +2 -2
- package/src/merge-fields.test.js +6 -2
- package/src/mergeField500Lisapainokset.js +1 -1
- package/src/normalizeFieldForComparison.js +26 -0
- package/src/punctuation2.js +14 -5
- package/src/removeInferiorDataFields.js +4 -1
- package/src/resolveOrphanedSubfield6s.js +1 -1
- package/src/sortSubfields.js +7 -5
- package/src/translate-terms.test.js +25 -2
- package/src/utils.js +19 -3
- package/test-fixtures/indicator-fixes/10/expectedResult.json +11 -0
- package/test-fixtures/indicator-fixes/10/metadata.json +4 -0
- package/test-fixtures/indicator-fixes/10/record.json +11 -0
- package/test-fixtures/merge-fields/f05/expectedResult.json +24 -0
- package/test-fixtures/merge-fields/f05/metadata.json +6 -0
- package/test-fixtures/merge-fields/f05/record.json +30 -0
- package/test-fixtures/remove-inferior-datafields/f16/expectedResult.json +12 -0
- package/test-fixtures/remove-inferior-datafields/f16/metadata.json +5 -0
- package/test-fixtures/remove-inferior-datafields/f16/record.json +14 -0
- package/test-fixtures/translate-terms-data.js +42 -0
- package/src/melindaCustomMergeFields.json +0 -5120
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import assert from 'node:assert';
|
|
2
2
|
import {MarcRecord} from '@natlibfi/marc-record';
|
|
3
|
-
import validatorFactory from '
|
|
3
|
+
import validatorFactory from './field-008-18-34-character-groups.js';
|
|
4
4
|
import {READERS} from '@natlibfi/fixura';
|
|
5
5
|
import generateTests from '@natlibfi/fixugen';
|
|
6
6
|
import createDebugLogger from 'debug';
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import assert from 'node:assert';
|
|
2
2
|
import {describe, it} from 'node:test';
|
|
3
3
|
import {MarcRecord} from '@natlibfi/marc-record';
|
|
4
|
-
import validatorFactory from '
|
|
4
|
+
import validatorFactory from './field-structure.js';
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
// Factory validation
|
package/src/index.js
CHANGED
|
@@ -1,62 +1,85 @@
|
|
|
1
|
-
import AccessRights from './access-rights';
|
|
2
|
-
import AddMissingField041 from './addMissingField041';
|
|
3
|
-
import AddMissingField336 from './addMissingField336';
|
|
4
|
-
import AddMissingField337 from './addMissingField337';
|
|
5
|
-
import AddMissingField338 from './addMissingField338';
|
|
6
|
-
import Cyrillux from './cyrillux';
|
|
7
|
-
import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement';
|
|
8
|
-
import DisambiguateSeriesStatements from './disambiguateSeriesStatements';
|
|
9
|
-
import DoubleCommas from './double-commas';
|
|
10
|
-
import DuplicatesInd1 from './duplicates-ind1';
|
|
11
|
-
import EmptyFields from './empty-fields';
|
|
12
|
-
import EndingPunctuation from './ending-punctuation';
|
|
13
|
-
import EndingWhitespace from './ending-whitespace';
|
|
14
|
-
import Field008CharacterGroups from './field-008-18-34-character-groups';
|
|
15
|
-
import Field505Separators from './field-505-separators';
|
|
16
|
-
import Field521Fix from './field-521-fix';
|
|
17
|
-
import FieldExclusion from './field-exclusion';
|
|
18
|
-
import FieldStructure from './field-structure';
|
|
19
|
-
import FieldsPresent from './fields-present';
|
|
20
|
-
import Fix33X from './fix-33X';
|
|
21
|
-
import FixCountryCodes from './fix-country-codes';
|
|
22
|
-
import FixLanguageCodes from './fix-language-codes';
|
|
23
|
-
import FixRelatorTerms from './fixRelatorTerms';
|
|
24
|
-
import FixedFields from './fixed-fields';
|
|
25
|
-
import IdenticalFields from './identical-fields';
|
|
26
|
-
import IndicatorFixes from './indicator-fixes';
|
|
27
|
-
import IsbnIssn from './isbn-issn';
|
|
28
|
-
import ItemLanguage from './item-language';
|
|
29
|
-
import MergeField500Lisapainokset from './mergeField500Lisapainokset';
|
|
30
|
-
import MergeFields from './merge-fields/';
|
|
31
|
-
import MergeRelatorTermFields from './mergeRelatorTermFields';
|
|
32
|
-
import Modernize502 from './modernize-502';
|
|
33
|
-
import MultipleSubfield0s from './multiple-subfield-0';
|
|
34
|
-
import NonBreakingSpace from './non-breaking-space';
|
|
35
|
-
import NormalizeDashes from './normalize-dashes';
|
|
36
|
-
import NormalizeIdentifiers from './normalize-identifiers';
|
|
37
|
-
import NormalizeQualifyingInformation from './normalize-qualifying-information';
|
|
38
|
-
import NormalizeUTF8Diacritics from './normalize-utf8-diacritics';
|
|
39
|
-
import Punctuation from './punctuation/';
|
|
40
|
-
import Punctuation2 from './punctuation2';
|
|
41
|
-
import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers';
|
|
42
|
-
import RemoveDuplicateDataFields from './removeDuplicateDataFields';
|
|
43
|
-
import RemoveInferiorDataFields from './removeInferiorDataFields';
|
|
44
|
-
import ResolvableExtReferences from './resolvable-ext-references-melinda';
|
|
45
|
-
import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s';
|
|
46
|
-
import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes';
|
|
47
|
-
import SortFields from './sortFields';
|
|
48
|
-
import SortRelatorTerms from './sortRelatorTerms';
|
|
49
|
-
import SortSubfields from './sortSubfields';
|
|
50
|
-
import SortTags from './sort-tags';
|
|
1
|
+
import AccessRights from './access-rights.js';
|
|
2
|
+
import AddMissingField041 from './addMissingField041.js';
|
|
3
|
+
import AddMissingField336 from './addMissingField336.js';
|
|
4
|
+
import AddMissingField337 from './addMissingField337.js';
|
|
5
|
+
import AddMissingField338 from './addMissingField338.js';
|
|
6
|
+
import Cyrillux from './cyrillux.js';
|
|
7
|
+
import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement.js';
|
|
8
|
+
import DisambiguateSeriesStatements from './disambiguateSeriesStatements.js';
|
|
9
|
+
import DoubleCommas from './double-commas.js';
|
|
10
|
+
import DuplicatesInd1 from './duplicates-ind1.js';
|
|
11
|
+
import EmptyFields from './empty-fields.js';
|
|
12
|
+
import EndingPunctuation from './ending-punctuation.js';
|
|
13
|
+
import EndingWhitespace from './ending-whitespace.js';
|
|
14
|
+
import Field008CharacterGroups from './field-008-18-34-character-groups.js';
|
|
15
|
+
import Field505Separators from './field-505-separators.js';
|
|
16
|
+
import Field521Fix from './field-521-fix.js';
|
|
17
|
+
import FieldExclusion from './field-exclusion.js';
|
|
18
|
+
import FieldStructure from './field-structure.js';
|
|
19
|
+
import FieldsPresent from './fields-present.js';
|
|
20
|
+
import Fix33X from './fix-33X.js';
|
|
21
|
+
import FixCountryCodes from './fix-country-codes.js';
|
|
22
|
+
import FixLanguageCodes from './fix-language-codes.js';
|
|
23
|
+
import FixRelatorTerms from './fixRelatorTerms.js';
|
|
24
|
+
import FixedFields from './fixed-fields.js';
|
|
25
|
+
import IdenticalFields from './identical-fields.js';
|
|
26
|
+
import IndicatorFixes from './indicator-fixes.js';
|
|
27
|
+
import IsbnIssn from './isbn-issn.js';
|
|
28
|
+
import ItemLanguage from './item-language.js';
|
|
29
|
+
import MergeField500Lisapainokset from './mergeField500Lisapainokset.js';
|
|
30
|
+
import MergeFields from './merge-fields/index.js';
|
|
31
|
+
import MergeRelatorTermFields from './mergeRelatorTermFields.js';
|
|
32
|
+
import Modernize502 from './modernize-502.js';
|
|
33
|
+
import MultipleSubfield0s from './multiple-subfield-0.js';
|
|
34
|
+
import NonBreakingSpace from './non-breaking-space.js';
|
|
35
|
+
import NormalizeDashes from './normalize-dashes.js';
|
|
36
|
+
import NormalizeIdentifiers from './normalize-identifiers.js';
|
|
37
|
+
import NormalizeQualifyingInformation from './normalize-qualifying-information.js';
|
|
38
|
+
import NormalizeUTF8Diacritics from './normalize-utf8-diacritics.js';
|
|
39
|
+
import Punctuation from './punctuation/index.js';
|
|
40
|
+
import Punctuation2 from './punctuation2.js';
|
|
41
|
+
import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers.js';
|
|
42
|
+
import RemoveDuplicateDataFields from './removeDuplicateDataFields.js';
|
|
43
|
+
import RemoveInferiorDataFields from './removeInferiorDataFields.js';
|
|
44
|
+
import ResolvableExtReferences from './resolvable-ext-references-melinda.js';
|
|
45
|
+
import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s.js';
|
|
46
|
+
import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes.js';
|
|
47
|
+
import SortFields from './sortFields.js';
|
|
48
|
+
import SortRelatorTerms from './sortRelatorTerms.js';
|
|
49
|
+
import SortSubfields from './sortSubfields.js';
|
|
50
|
+
import SortTags from './sort-tags.js';
|
|
51
51
|
// import StripPunctuation from './stripPunctuation'; // Can we add this here? Should be used very cautiosly!
|
|
52
|
-
import SubfieldValueNormalizations from './subfieldValueNormalizations';
|
|
53
|
-
import SubfieldExclusion from './subfield-exclusion';
|
|
54
|
-
import Sync007And300 from './sync-007-and-300';
|
|
55
|
-
import TranslateTerms from './translate-terms';
|
|
56
|
-
import TypeOfDateF008 from './typeOfDate-008';
|
|
57
|
-
import UnicodeDecomposition from './unicode-decomposition';
|
|
58
|
-
import UpdateField540 from './update-field-540';
|
|
59
|
-
import Urn from './urn';
|
|
52
|
+
import SubfieldValueNormalizations from './subfieldValueNormalizations.js';
|
|
53
|
+
import SubfieldExclusion from './subfield-exclusion.js';
|
|
54
|
+
import Sync007And300 from './sync-007-and-300.js';
|
|
55
|
+
import TranslateTerms from './translate-terms.js';
|
|
56
|
+
import TypeOfDateF008 from './typeOfDate-008.js';
|
|
57
|
+
import UnicodeDecomposition from './unicode-decomposition.js';
|
|
58
|
+
import UpdateField540 from './update-field-540.js';
|
|
59
|
+
import Urn from './urn.js';
|
|
60
|
+
|
|
61
|
+
import {getCounterpart} from './merge-fields/counterpartField.js';
|
|
62
|
+
|
|
63
|
+
import {postprocessRecords} from './merge-fields/mergeOrAddPostprocess.js';
|
|
64
|
+
import {mergeField} from './merge-fields/mergeField.js';
|
|
65
|
+
import {fieldGetOccurrenceNumberPairs, fieldGetUnambiguousOccurrenceNumber, fieldToNormalizedString, fieldsToNormalizedString, get6s,
|
|
66
|
+
isValidSubfield6, recordGetMaxSubfield6OccurrenceNumberAsInteger,
|
|
67
|
+
intToOccurrenceNumberString, resetSubfield6Tag, subfield6ResetOccurrenceNumber, subfield6GetOccurrenceNumber,
|
|
68
|
+
subfield6GetOccurrenceNumberAsInteger} from './subfield6Utils.js';
|
|
69
|
+
|
|
70
|
+
import {getSubfield8LinkingNumber, isValidSubfield8, recordGetAllSubfield8LinkingNumbers, recordGetFieldsWithSubfield8LinkingNumber} from './subfield8Utils.js';
|
|
71
|
+
|
|
72
|
+
import {recordFixRelatorTerms} from './fixRelatorTerms.js';
|
|
73
|
+
import {fieldTrimSubfieldValues} from './normalizeFieldForComparison.js';
|
|
74
|
+
import {baseHasEqualOrHigherEncodingLevel, deleteAllPrepublicationNotesFromField500InNonPubRecord, encodingLevelIsBetterThanPrepublication, getEncodingLevel, isEnnakkotietoField, isEnnakkotietoSubfield} from './prepublicationUtils.js';
|
|
75
|
+
import {melindaFieldSpecs} from '@natlibfi/marc-record-validators-melinda/dist/melindaCustomMergeFields.js';
|
|
76
|
+
|
|
77
|
+
import {cloneAndRemovePunctuation} from './normalizeFieldForComparison.js';
|
|
78
|
+
import {removeWorsePrepubField500s, removeWorsePrepubField594s} from './prepublicationUtils.js';
|
|
79
|
+
import {fieldFixPunctuation} from './punctuation2.js';
|
|
80
|
+
import {recordResetSubfield6OccurrenceNumbers} from './reindexSubfield6OccurenceNumbers.js';
|
|
81
|
+
import {sortAdjacentSubfields} from './sortSubfields.js';
|
|
82
|
+
import {fieldsToString} from './utils.js';
|
|
60
83
|
|
|
61
84
|
export {
|
|
62
85
|
AccessRights,
|
|
@@ -116,5 +139,55 @@ export {
|
|
|
116
139
|
UpdateField540,
|
|
117
140
|
Urn,
|
|
118
141
|
SortFields, // Keep this penultimate
|
|
119
|
-
MergeFields // Run this last *iff* you want to use this at all
|
|
142
|
+
MergeFields, // Run this last *iff* you want to use this at all
|
|
143
|
+
|
|
144
|
+
// Functions for processing record... These should probably go to some other project.
|
|
145
|
+
// Too specific for marc-record-js though...
|
|
146
|
+
// 1. generic low level stuff
|
|
147
|
+
getEncodingLevel,
|
|
148
|
+
|
|
149
|
+
// 2. text normalizations (eg. for similarity comparisons, field merge)
|
|
150
|
+
cloneAndRemovePunctuation,
|
|
151
|
+
fieldFixPunctuation,
|
|
152
|
+
fieldToNormalizedString,
|
|
153
|
+
fieldTrimSubfieldValues,
|
|
154
|
+
fieldsToNormalizedString,
|
|
155
|
+
fieldsToString,
|
|
156
|
+
recordFixRelatorTerms,
|
|
157
|
+
sortAdjacentSubfields,
|
|
158
|
+
|
|
159
|
+
// 3. prepublication stuff
|
|
160
|
+
baseHasEqualOrHigherEncodingLevel,
|
|
161
|
+
deleteAllPrepublicationNotesFromField500InNonPubRecord,
|
|
162
|
+
encodingLevelIsBetterThanPrepublication,
|
|
163
|
+
isEnnakkotietoField,
|
|
164
|
+
isEnnakkotietoSubfield,
|
|
165
|
+
removeWorsePrepubField500s,
|
|
166
|
+
removeWorsePrepubField594s,
|
|
167
|
+
|
|
168
|
+
// 4. subfield $6 related functions
|
|
169
|
+
fieldGetOccurrenceNumberPairs,
|
|
170
|
+
get6s,
|
|
171
|
+
fieldGetUnambiguousOccurrenceNumber,
|
|
172
|
+
intToOccurrenceNumberString,
|
|
173
|
+
isValidSubfield6,
|
|
174
|
+
recordGetMaxSubfield6OccurrenceNumberAsInteger,
|
|
175
|
+
recordResetSubfield6OccurrenceNumbers,
|
|
176
|
+
resetSubfield6Tag,
|
|
177
|
+
subfield6ResetOccurrenceNumber,
|
|
178
|
+
subfield6GetOccurrenceNumber,
|
|
179
|
+
subfield6GetOccurrenceNumberAsInteger,
|
|
180
|
+
|
|
181
|
+
// 5. subfield $8 related functions
|
|
182
|
+
getSubfield8LinkingNumber,
|
|
183
|
+
isValidSubfield8,
|
|
184
|
+
recordGetAllSubfield8LinkingNumbers,
|
|
185
|
+
recordGetFieldsWithSubfield8LinkingNumber,
|
|
186
|
+
|
|
187
|
+
// 6. merge, other
|
|
188
|
+
getCounterpart, // field merge: finds a similar field with which a field can merge
|
|
189
|
+
melindaFieldSpecs, // contains information about the legal fields and subfields, and their repeatability
|
|
190
|
+
mergeField,
|
|
191
|
+
postprocessRecords // clean-up function that cleans up both base and source record (which may be the same)
|
|
192
|
+
|
|
120
193
|
};
|
package/src/indicator-fixes.js
CHANGED
|
@@ -171,6 +171,18 @@ function normalize245Indicator1(field, record) {
|
|
|
171
171
|
field.ind1 = field1XX.length === 0 ? '0' : '1';
|
|
172
172
|
}
|
|
173
173
|
|
|
174
|
+
function noDisplayConstantGenerated520Indicator1(field) {
|
|
175
|
+
if (field.tag !== '520') {
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
const as = field.subfields.filter(sf => sf.code === 'a');
|
|
179
|
+
// Set ind1=8 "no display constant generated" fro certain values (part of MELKEHITYS-2579):
|
|
180
|
+
if (as.length === 1 && ['Abstract.', 'Abstrakt.', 'Abstrakti.', 'Abstract.', 'English Summary.', 'Sammandrag.', 'Tiivistelmä.'].includes(field.subfields[0].value)) {
|
|
181
|
+
field.ind1 = '8';
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
}
|
|
185
|
+
|
|
174
186
|
function normalize776Indicator2(field) {
|
|
175
187
|
if (field.tag !== '776') {
|
|
176
188
|
return;
|
|
@@ -229,7 +241,7 @@ function getLanguages(record) {
|
|
|
229
241
|
|
|
230
242
|
}
|
|
231
243
|
|
|
232
|
-
|
|
244
|
+
function recordNormalizeIndicators(record) {
|
|
233
245
|
recordNormalize490(record);
|
|
234
246
|
|
|
235
247
|
// Language is used to handle non-filing indicators
|
|
@@ -242,6 +254,7 @@ export function recordNormalizeIndicators(record) {
|
|
|
242
254
|
function fieldNormalizeIndicators(field, record, languages) {
|
|
243
255
|
normalize084Indicator1(field);
|
|
244
256
|
normalize245Indicator1(field, record);
|
|
257
|
+
noDisplayConstantGenerated520Indicator1(field);
|
|
245
258
|
normalizeNonFilingIndicator1(field, languages);
|
|
246
259
|
normalizeNonFilingIndicator2(field, languages);
|
|
247
260
|
normalize776Indicator2(field);
|
package/src/isbn-issn.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import ISBN from 'isbn3';
|
|
2
|
-
import validateISSN from '@natlibfi/issn-verify';
|
|
2
|
+
import {issn as validateISSN} from '@natlibfi/issn-verify';
|
|
3
3
|
|
|
4
4
|
// handleInvalid: move invalid 020$a to 020$z, and invalid 022$a to 022$y
|
|
5
5
|
export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
|
|
@@ -29,8 +29,14 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
|
|
|
29
29
|
|
|
30
30
|
function invalidISBN(isbn) {
|
|
31
31
|
const isbnOnly = getFirstWord(isbn);
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
try {
|
|
33
|
+
const auditedIsbn = ISBN.audit(isbnOnly);
|
|
34
|
+
return !auditedIsbn.validIsbn;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return true;
|
|
38
|
+
}
|
|
39
|
+
|
|
34
40
|
}
|
|
35
41
|
|
|
36
42
|
function invalidSubfield(subfield) {
|
|
@@ -238,9 +244,8 @@ export default ({hyphenateISBN = false, handleInvalid = false} = {}) => {
|
|
|
238
244
|
function normalizeIsbnValue(value) {
|
|
239
245
|
const trimmedValue = getFirstWord(value);
|
|
240
246
|
//const trimmedValue = trimISBN(value); // NB! This might lose information that should be stored in $q...
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
return undefined;
|
|
247
|
+
if (invalidISBN(trimmedValue)) {
|
|
248
|
+
return undefined; // should this return value (= nothing normalized), not undefined?
|
|
244
249
|
}
|
|
245
250
|
const numbersOnly = trimmedValue.replace(/[^0-9Xx]+/ug, '');
|
|
246
251
|
const parsedIsbn = ISBN.parse(trimmedValue);
|
|
@@ -11,6 +11,7 @@ import {controlSubfieldsPermitMerge} from './controlSubfields.js';
|
|
|
11
11
|
import {mergableIndicator1, mergableIndicator2} from './mergableIndicator.js';
|
|
12
12
|
import {partsAgree} from '../normalizeSubfieldValueForComparison.js';
|
|
13
13
|
import {normalizeForSamenessCheck, valueCarriesMeaning} from './worldKnowledge.js';
|
|
14
|
+
import { provenanceSubfieldsPermitMerge } from './dataProvenance.js';
|
|
14
15
|
|
|
15
16
|
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:mergeField:counterpart');
|
|
16
17
|
//const debugData = debug.extend('data');
|
|
@@ -375,6 +376,11 @@ function syntacticallyMergablePair(baseField, sourceField, config) {
|
|
|
375
376
|
return false;
|
|
376
377
|
}
|
|
377
378
|
|
|
379
|
+
if (!provenanceSubfieldsPermitMerge(baseField, sourceField)) {
|
|
380
|
+
nvdebug('non-mergable (reason: data provenance subfield)', debugDev);
|
|
381
|
+
return false;
|
|
382
|
+
}
|
|
383
|
+
|
|
378
384
|
// NB! field1.tag and field2.tag might differ (1XX vs 7XX). Therefore required subfields might theoretically differ as well.
|
|
379
385
|
// Note: Theoretically 260 $efg vs 264 with IND2=3 has already been handled by the preprocessor.
|
|
380
386
|
// Thus check both:
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// See https://www.loc.gov/marc/bibliographic/bdapndxj.html for details
|
|
2
|
+
|
|
3
|
+
import {subfieldArraysContainSameData} from "../utils.js";
|
|
4
|
+
|
|
5
|
+
export function tagToDataProvenanceSubfieldCode(tag) {
|
|
6
|
+
if ( ['533', '800', '810', '811', '830'].includes(tag)) {
|
|
7
|
+
return 'y';
|
|
8
|
+
}
|
|
9
|
+
if ( tag === '856' || tag === '857' ) {
|
|
10
|
+
return 'e';
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if ( tag.match(/^7[678]/u) ) {
|
|
14
|
+
return 'l'
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if ( tag.match(/^00/u)) {
|
|
18
|
+
return undefined;
|
|
19
|
+
}
|
|
20
|
+
return '7';
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
export function provenanceSubfieldsPermitMerge(baseField, sourceField) {
|
|
25
|
+
const provinanceSubfieldCode = tagToDataProvenanceSubfieldCode(baseField.tag);
|
|
26
|
+
if (!baseField.subfields) {
|
|
27
|
+
return true;
|
|
28
|
+
}
|
|
29
|
+
if (provinanceSubfieldCode === undefined) {
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const baseProvinanceSubfields = baseField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
|
|
34
|
+
const sourceProvinanceSubfields = sourceField.subfields.filter(sf => sf.code === provinanceSubfieldCode);
|
|
35
|
+
|
|
36
|
+
// Currently we just compare two arrays. Later on we might do something more sophisticated with specific $7 data provenance category/relationship codes,
|
|
37
|
+
// or actual values.
|
|
38
|
+
|
|
39
|
+
return subfieldArraysContainSameData(baseProvinanceSubfields, sourceProvinanceSubfields);
|
|
40
|
+
|
|
41
|
+
}
|
|
@@ -18,16 +18,25 @@ import {mergeConfig as defaultConfig} from './mergeConfig.js';
|
|
|
18
18
|
|
|
19
19
|
//const defaultConfig = JSON.parse(fs.readFileSync(path.join(__dirname, '..', '..', 'src', 'merge-fields', 'config.json'), 'utf8'));
|
|
20
20
|
|
|
21
|
-
export default function () {
|
|
21
|
+
export default function (defaultTagPattern = undefined) {
|
|
22
22
|
|
|
23
23
|
return {
|
|
24
24
|
description, validate, fix
|
|
25
25
|
};
|
|
26
26
|
|
|
27
|
+
function getTagPattern(config) {
|
|
28
|
+
if (config && config.tagPattern) {
|
|
29
|
+
return config.tagPattern;
|
|
30
|
+
}
|
|
31
|
+
if (defaultTagPattern) { // Used by tests
|
|
32
|
+
return defaultTagPattern;
|
|
33
|
+
}
|
|
34
|
+
return '^[1678](?:00|10|11|30)$';
|
|
35
|
+
}
|
|
27
36
|
|
|
28
37
|
function mergeFieldsWithinRecord(record, config) {
|
|
29
38
|
//const candFields = record.fields.toReversed(); // Node 20+ only! Filter via config?
|
|
30
|
-
const fields = config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
|
|
39
|
+
const fields = record.get(getTagPattern(config)); // config && config.tagPattern ? record.get(config.tagPattern) : record.get(/^[1678](?:00|10|11|30)$/u);
|
|
31
40
|
|
|
32
41
|
fields.reverse();
|
|
33
42
|
const mergedField = fields.find(f => mergeField(record, record, f, config));
|
|
@@ -106,8 +106,8 @@ function skipMergeField(baseRecord, sourceField, config) {
|
|
|
106
106
|
return true;
|
|
107
107
|
}
|
|
108
108
|
|
|
109
|
-
// Skip duplicate field:
|
|
110
|
-
if (baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
|
|
109
|
+
// Skip duplicate field when merging two records (NB! internal merge merges/removes the duplicate field):
|
|
110
|
+
if (!baseRecord.internalMerge && baseRecord.fields.some(baseField => !baseField.mergeCandidate && fieldsAreIdentical(sourceField, baseField))) {
|
|
111
111
|
nvdebug(`skipMergeField(): field '${fieldToString(sourceField)}' already exists! No merge required!`, debugDev);
|
|
112
112
|
sourceField.deleted = 1;
|
|
113
113
|
return true;
|
package/src/merge-fields.test.js
CHANGED
|
@@ -4,6 +4,7 @@ import validatorFactory from './merge-fields/index.js';
|
|
|
4
4
|
import {READERS} from '@natlibfi/fixura';
|
|
5
5
|
import generateTests from '@natlibfi/fixugen';
|
|
6
6
|
import createDebugLogger from 'debug';
|
|
7
|
+
import { nvdebug } from './utils.js';
|
|
7
8
|
|
|
8
9
|
generateTests({
|
|
9
10
|
callback,
|
|
@@ -29,17 +30,20 @@ async function testValidatorFactory() {
|
|
|
29
30
|
assert.equal(typeof validator.validate, 'function');
|
|
30
31
|
}
|
|
31
32
|
|
|
32
|
-
async function callback({getFixture, enabled = true, fix = false}) {
|
|
33
|
+
async function callback({getFixture, enabled = true, fix = false, tagPattern = false}) {
|
|
33
34
|
if (enabled === false) {
|
|
34
35
|
debug('TEST SKIPPED!');
|
|
35
36
|
return;
|
|
36
37
|
}
|
|
37
38
|
|
|
38
|
-
|
|
39
|
+
nvdebug(`TAG PATTERN: ${tagPattern}`);
|
|
40
|
+
|
|
41
|
+
const validator = await validatorFactory(tagPattern);
|
|
39
42
|
const record = new MarcRecord(getFixture('record.json'));
|
|
40
43
|
const expectedResult = getFixture('expectedResult.json');
|
|
41
44
|
// console.log(expectedResult); // eslint-disable-line
|
|
42
45
|
|
|
46
|
+
// NB! This validator will only use tags matching /^[1678](?:00|10|11|30)$/ unless tagPattern is specified!
|
|
43
47
|
if (!fix) {
|
|
44
48
|
const result = await validator.validate(record);
|
|
45
49
|
assert.deepEqual(result, expectedResult);
|
|
@@ -127,7 +127,7 @@ function extractAllPrintData(relevantFields) {
|
|
|
127
127
|
}
|
|
128
128
|
|
|
129
129
|
|
|
130
|
-
|
|
130
|
+
function mergeLisapainokset(record) {
|
|
131
131
|
const relevantFields = getRelevantFields(record);
|
|
132
132
|
if (relevantFields.length < 2) {
|
|
133
133
|
return;
|
|
@@ -19,6 +19,9 @@ const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:no
|
|
|
19
19
|
const debugDev = debug.extend('dev');
|
|
20
20
|
|
|
21
21
|
export function isEnnakkotietoSubfieldG(subfield) {
|
|
22
|
+
if (valuelessSubfield(subfield)) {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
22
25
|
if (subfield.code !== 'g') {
|
|
23
26
|
return false;
|
|
24
27
|
}
|
|
@@ -116,6 +119,9 @@ function subfieldValueLowercase(value, subfieldCode, tag) {
|
|
|
116
119
|
}
|
|
117
120
|
|
|
118
121
|
function subfieldLowercase(sf, tag) {
|
|
122
|
+
if (valuelessSubfield(sf)) {
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
119
125
|
sf.value = subfieldValueLowercase(sf.value, sf.code, tag);
|
|
120
126
|
}
|
|
121
127
|
|
|
@@ -148,6 +154,10 @@ function hack490SubfieldA(field) {
|
|
|
148
154
|
|
|
149
155
|
// NB! This won't work, if the punctuation has not been stripped beforehand!
|
|
150
156
|
function removeSarja(subfield) {
|
|
157
|
+
if (valuelessSubfield(subfield)) {
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
|
|
151
161
|
if (subfield.code !== 'a') {
|
|
152
162
|
return;
|
|
153
163
|
}
|
|
@@ -188,6 +198,9 @@ function normalizeISBN(field) {
|
|
|
188
198
|
relevantSubfields.forEach(sf => normalizeIsbnSubfield(sf));
|
|
189
199
|
|
|
190
200
|
function normalizeIsbnSubfield(sf) {
|
|
201
|
+
if (valuelessSubfield(sf)) {
|
|
202
|
+
return;
|
|
203
|
+
}
|
|
191
204
|
//nvdebug(` ISBN-subfield? ${subfieldToString(sf)}`);
|
|
192
205
|
sf.value = sf.value.replace(/-/ug, '');
|
|
193
206
|
sf.value = sf.value.replace(/x/u, 'X');
|
|
@@ -202,6 +215,9 @@ function fieldSpecificHacks(field) {
|
|
|
202
215
|
|
|
203
216
|
export function fieldTrimSubfieldValues(field) {
|
|
204
217
|
field.subfields?.forEach((sf) => {
|
|
218
|
+
if (valuelessSubfield(sf)) {
|
|
219
|
+
return;
|
|
220
|
+
}
|
|
205
221
|
sf.value = sf.value.replace(/^[ \t\n]+/u, '');
|
|
206
222
|
sf.value = sf.value.replace(/[ \t\n]+$/u, '');
|
|
207
223
|
sf.value = sf.value.replace(/[ \t\n]+/gu, ' ');
|
|
@@ -212,6 +228,9 @@ function fieldRemoveDecomposedDiacritics(field) {
|
|
|
212
228
|
// Raison d'être/motivation: "Sirén" and diacriticless "Siren" might refer to a same surname, so this normalization
|
|
213
229
|
// allows us to compare authors and avoid duplicate fields.
|
|
214
230
|
field.subfields.forEach((sf) => {
|
|
231
|
+
if (valuelessSubfield(sf)) {
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
215
234
|
sf.value = removeDecomposedDiacritics(sf.value);
|
|
216
235
|
});
|
|
217
236
|
}
|
|
@@ -297,6 +316,9 @@ export function cloneAndNormalizeFieldForComparison(field) {
|
|
|
297
316
|
return clonedField;
|
|
298
317
|
}
|
|
299
318
|
clonedField.subfields.forEach((sf) => { // Do this for all fields or some fields?
|
|
319
|
+
if (valuelessSubfield(sf)) {
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
300
322
|
sf.value = normalizeSubfieldValue(sf.value, sf.code, field.tag);
|
|
301
323
|
sf.value = removeCharsThatDontCarryMeaning(sf.value, field.tag, sf.code);
|
|
302
324
|
});
|
|
@@ -318,3 +340,7 @@ function fieldSkipNormalization(field) {
|
|
|
318
340
|
}
|
|
319
341
|
return false;
|
|
320
342
|
}
|
|
343
|
+
|
|
344
|
+
function valuelessSubfield(sf) {
|
|
345
|
+
return sf.value === undefined;
|
|
346
|
+
}
|
package/src/punctuation2.js
CHANGED
|
@@ -10,9 +10,10 @@
|
|
|
10
10
|
* (They are jumped over when looking for next (non-controlfield subfield)
|
|
11
11
|
*/
|
|
12
12
|
import {validateSingleField} from './ending-punctuation.js';
|
|
13
|
+
import {tagToDataProvenanceSubfieldCode} from './merge-fields/dataProvenance.js';
|
|
13
14
|
import {fieldGetUnambiguousTag} from './subfield6Utils.js';
|
|
14
15
|
//import createDebugLogger from 'debug';
|
|
15
|
-
import {fieldToString, nvdebug} from './utils.js';
|
|
16
|
+
import {fieldToString, isControlSubfieldCode, nvdebug} from './utils.js';
|
|
16
17
|
import clone from 'clone';
|
|
17
18
|
|
|
18
19
|
//const debug = createDebugLogger('debug/punctuation2');
|
|
@@ -49,12 +50,19 @@ export default function () {
|
|
|
49
50
|
}
|
|
50
51
|
}
|
|
51
52
|
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
function isIrrelevantSubfield(subfield, tag) {
|
|
56
|
+
const dataProvenanceSubfieldCode = tagToDataProvenanceSubfieldCode(tag);
|
|
57
|
+
if (subfield.code === dataProvenanceSubfieldCode) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
return isControlSubfieldCode(subfield.code); // Currently this contains other stuff as well ($3, $4, $7, $9...)
|
|
54
61
|
}
|
|
55
62
|
|
|
63
|
+
|
|
56
64
|
function getNextRelevantSubfield(field, currSubfieldIndex) {
|
|
57
|
-
return field.subfields.find((subfield, index) => index > currSubfieldIndex && !
|
|
65
|
+
return field.subfields.find((subfield, index) => index > currSubfieldIndex && !isIrrelevantSubfield(subfield, field.tag));
|
|
58
66
|
}
|
|
59
67
|
|
|
60
68
|
export function fieldGetFixedString(field, add = true) {
|
|
@@ -155,7 +163,8 @@ const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remov
|
|
|
155
163
|
const linkingEntryRemoveWhatever = [
|
|
156
164
|
{'code': 'i', 'followedBy': 'at', 'remove': / ?:$/u}, // ':'
|
|
157
165
|
{'code': 'at', 'remove': /\.$/u},
|
|
158
|
-
|
|
166
|
+
// Only ". -" separator is still used in music. We can strip it, but can only create the non-music punctuation!
|
|
167
|
+
{'code': 'abdghiklmnopqrstuwxyz', 'followedBy': 'abdghiklmnopqrstuwxyz#', 'remove': /\. -$/u}
|
|
159
168
|
];
|
|
160
169
|
|
|
161
170
|
|
|
@@ -332,7 +332,10 @@ function deriveIndividualDeletables(record) {
|
|
|
332
332
|
// MET-575 (merge: applies in postprocessing)
|
|
333
333
|
const inferiorTerms = getPrepublicationTerms(currString);
|
|
334
334
|
|
|
335
|
-
|
|
335
|
+
// MELKEHITYS-3277-ish: non-AI is better than AI (a rare case where longer version is inferior):
|
|
336
|
+
const aiBased = `${currString} ‡7 (dpenmw)AI`;
|
|
337
|
+
|
|
338
|
+
const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms, aiBased];
|
|
336
339
|
|
|
337
340
|
if (subsets.length) {
|
|
338
341
|
return processTodoList([...stillToDo, ...moreToDo], newDeletables);
|
|
@@ -41,7 +41,7 @@ export default function () {
|
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
-
|
|
44
|
+
function recordFixSubfield6OccurrenceNumbers(record) {
|
|
45
45
|
const fieldsContainingSubfield6 = record.fields.filter(field => fieldHasSubfield(field, '6'));
|
|
46
46
|
const orphanedFields = getOrphanedFields(fieldsContainingSubfield6);
|
|
47
47
|
|
package/src/sortSubfields.js
CHANGED
|
@@ -67,9 +67,9 @@ export default function (defaultTagPattern) {
|
|
|
67
67
|
|
|
68
68
|
// X00, X10, X11 and X130 could also for their own sets...
|
|
69
69
|
// (ouch! sometimes $c comes after $d...): LoC: 100 0# ‡a Black Foot, ‡c Chief, ‡d d. 1877 ‡c (Spirit)
|
|
70
|
-
const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '5', '9']; // skip $g. Can't remember why, though...
|
|
71
|
-
const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '5', '9']; // somewhat iffy
|
|
72
|
-
const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '5', '9'];
|
|
70
|
+
const sortOrderForX00 = ['i', 'a', 'b', 'q', 'c', 'd', 'e', 't', 'u', 'l', 'f', 'x', 'y', 'z', '0', '1', '5', '9']; // skip $g. Can't remember why, though...
|
|
71
|
+
const sortOrderForX10 = ['i', 'a', 'b', 't', 'n', 'c', 'e', 'v', 'w', 'x', 'y', 'z', '0', '1', '5', '9']; // somewhat iffy
|
|
72
|
+
const sortOrderForX11 = ['a', 'n', 'd', 'c', 'e', 'g', 'j', '0', '1', '5', '9'];
|
|
73
73
|
const sortOrderFor7XX = ['8', '7', 'i', 'a', 's', 't', 'b', 'c', 'd', 'm', 'h', 'k', 'o', 'x', 'z', 'g', 'q', 'w'];
|
|
74
74
|
const sortOrderFor246 = ['i', 'a', 'b', 'n', 'p', 'f', '5', '9']; // Used by field 946 as well
|
|
75
75
|
|
|
@@ -89,7 +89,7 @@ const subfieldSortOrder = [
|
|
|
89
89
|
{'tag': '245', 'sortOrder': ['a', 'b', 'n', 'p', 'k', 'f', 'c']},
|
|
90
90
|
{'tag': '246', 'sortOrder': sortOrderFor246},
|
|
91
91
|
{'tag': '382', 'sortOrder': ['a']},
|
|
92
|
-
{'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0']},
|
|
92
|
+
{'tag': '385', 'sortOrder': ['8', 'm', 'n', 'a', '2', '0', '1']},
|
|
93
93
|
{'tag': '386', 'sortOrder': ['8', 'm', 'n', 'a']},
|
|
94
94
|
{'tag': '490', 'sortOrder': ['a', 'x', 'y', 'v', 'l']},
|
|
95
95
|
{'tag': '505', 'sortOrder': ['a']},
|
|
@@ -218,7 +218,9 @@ export function sortAdjacentSubfields(field, externalSortOrder = []) {
|
|
|
218
218
|
|
|
219
219
|
|
|
220
220
|
const finnishWay = twoBeforeZero(field);
|
|
221
|
-
|
|
221
|
+
|
|
222
|
+
// Note: 760-789: '7' comes way earlier (after '6' and '8')
|
|
223
|
+
const controlSubfieldOrder = finnishWay ? ['8', '3', 'a', '4', '2', '0', '1', '7', '5', '9'] : ['8', '7', '3', 'a', '4', '0', '1', '2', '7', '5', '9'];
|
|
222
224
|
swapSubfields(field, controlSubfieldOrder);
|
|
223
225
|
|
|
224
226
|
const sortOrderForField = externalSortOrder.length > 0 ? externalSortOrder : getSubfieldSortOrder(field);
|