@natlibfi/marc-record-validators-melinda 10.16.0-alpha.2 → 10.16.1-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/access-rights.js.map +1 -1
- package/dist/access-rights.spec.js.map +1 -1
- package/dist/double-commas.js.map +1 -1
- package/dist/double-commas.spec.js.map +1 -1
- package/dist/duplicates-ind1.js.map +1 -1
- package/dist/duplicates-ind1.spec.js.map +1 -1
- package/dist/empty-fields.js.map +1 -1
- package/dist/empty-fields.spec.js.map +1 -1
- package/dist/ending-punctuation-conf.js.map +1 -1
- package/dist/ending-punctuation.js.map +1 -1
- package/dist/ending-punctuation.spec.js.map +1 -1
- package/dist/ending-whitespace.js.map +1 -1
- package/dist/ending-whitespace.spec.js.map +1 -1
- package/dist/field-008-18-34-character-groups.js.map +1 -1
- package/dist/field-008-18-34-character-groups.spec.js.map +1 -1
- package/dist/field-505-separators.js.map +1 -1
- package/dist/field-505-separators.spec.js.map +1 -1
- package/dist/field-521-fix.js.map +1 -1
- package/dist/field-521-fix.spec.js.map +1 -1
- package/dist/field-exclusion.js.map +1 -1
- package/dist/field-exclusion.spec.js.map +1 -1
- package/dist/field-structure.js.map +1 -1
- package/dist/field-structure.spec.js.map +1 -1
- package/dist/fields-present.js.map +1 -1
- package/dist/fields-present.spec.js.map +1 -1
- package/dist/fix-country-codes.js.map +1 -1
- package/dist/fix-country-codes.spec.js.map +1 -1
- package/dist/fixRelatorTerms.js.map +1 -1
- package/dist/fixRelatorTerms.spec.js.map +1 -1
- package/dist/fixed-fields.js.map +1 -1
- package/dist/fixed-fields.spec.js.map +1 -1
- package/dist/identical-fields.js.map +1 -1
- package/dist/identical-fields.spec.js.map +1 -1
- package/dist/index.js.map +1 -1
- package/dist/indicator-fixes.js.map +1 -1
- package/dist/indicator-fixes.spec.js.map +1 -1
- package/dist/isbn-issn.js.map +1 -1
- package/dist/isbn-issn.spec.js.map +1 -1
- package/dist/item-language.js.map +1 -1
- package/dist/item-language.spec.js.map +1 -1
- package/dist/mergeField500Lisapainokset.js.map +1 -1
- package/dist/mergeField500Lisapainokset.spec.js.map +1 -1
- package/dist/mergeRelatorTermFields.js.map +1 -1
- package/dist/mergeRelatorTermFields.spec.js.map +1 -1
- package/dist/multiple-subfield-0.js.map +1 -1
- package/dist/multiple-subfield-0.spec.js.map +1 -1
- package/dist/non-breaking-space.js.map +1 -1
- package/dist/non-breaking-space.spec.js.map +1 -1
- package/dist/normalize-dashes.js.map +1 -1
- package/dist/normalize-dashes.spec.js.map +1 -1
- package/dist/normalize-identifiers.js.map +1 -1
- package/dist/normalize-identifiers.spec.js.map +1 -1
- package/dist/normalize-qualifying-information.js.map +1 -1
- package/dist/normalize-qualifying-information.spec.js.map +1 -1
- package/dist/normalize-utf8-diacritics.js.map +1 -1
- package/dist/normalize-utf8-diacritics.spec.js.map +1 -1
- package/dist/normalizeFieldForComparison.js.map +1 -1
- package/dist/normalizeSubfieldValueForComparison.js.map +1 -1
- package/dist/prepublicationUtils.js.map +1 -1
- package/dist/punctuation/index.js.map +1 -1
- package/dist/punctuation/rules/aut.js.map +1 -1
- package/dist/punctuation/rules/bib.js.map +1 -1
- package/dist/punctuation/rules/index.js.map +1 -1
- package/dist/punctuation.spec.js.map +1 -1
- package/dist/punctuation2.js +131 -89
- package/dist/punctuation2.js.map +1 -1
- package/dist/punctuation2.spec.js.map +1 -1
- package/dist/reindexSubfield6OccurenceNumbers.js.map +1 -1
- package/dist/reindexSubfield6OccurenceNumbers.spec.js.map +1 -1
- package/dist/removeDuplicateDataFields.js.map +1 -1
- package/dist/removeDuplicateDataFields.spec.js.map +1 -1
- package/dist/removeInferiorDataFields.js.map +1 -1
- package/dist/removeInferiorDataFields.spec.js.map +1 -1
- package/dist/resolvable-ext-references-melinda.js.map +1 -1
- package/dist/resolvable-ext-references-melinda.spec.js.map +1 -1
- package/dist/resolveOrphanedSubfield6s.js.map +1 -1
- package/dist/resolveOrphanedSubfield6s.spec.js.map +1 -1
- package/dist/sanitize-vocabulary-source-codes.js.map +1 -1
- package/dist/sanitize-vocabulary-source-codes.spec.js.map +1 -1
- package/dist/sort-tags.js.map +1 -1
- package/dist/sort-tags.spec.js.map +1 -1
- package/dist/sortFields.js.map +1 -1
- package/dist/sortFields.spec.js.map +1 -1
- package/dist/sortRelatorTerms.js.map +1 -1
- package/dist/sortRelatorTerms.spec.js.map +1 -1
- package/dist/sortSubfields.js.map +1 -1
- package/dist/sortSubfields.spec.js.map +1 -1
- package/dist/stripPunctuation.js.map +1 -1
- package/dist/stripPunctuation.spec.js.map +1 -1
- package/dist/subfield-exclusion.js.map +1 -1
- package/dist/subfield-exclusion.spec.js.map +1 -1
- package/dist/subfield6Utils.js.map +1 -1
- package/dist/subfield8Utils.js.map +1 -1
- package/dist/subfieldValueNormalizations.js +28 -8
- package/dist/subfieldValueNormalizations.js.map +1 -1
- package/dist/subfieldValueNormalizations.spec.js.map +1 -1
- package/dist/sync-007-and-300.js.map +1 -1
- package/dist/sync-007-and-300.spec.js.map +1 -1
- package/dist/typeOfDate-008.js.map +1 -1
- package/dist/typeOfDate-008.spec.js.map +1 -1
- package/dist/unicode-decomposition.js.map +1 -1
- package/dist/unicode-decomposition.spec.js.map +1 -1
- package/dist/update-field-540.js.map +1 -1
- package/dist/update-field-540.spec.js.map +1 -1
- package/dist/urn.js.map +1 -1
- package/dist/urn.spec.js.map +1 -1
- package/dist/utils.js.map +1 -1
- package/package.json +6 -6
- package/src/punctuation2.js +103 -56
- package/src/subfieldValueNormalizations.js +32 -9
- package/test-fixtures/normalize-subfield-value/03/expectedResult.json +26 -0
- package/test-fixtures/normalize-subfield-value/03/metadata.json +6 -0
- package/test-fixtures/normalize-subfield-value/03/record.json +25 -0
- package/test-fixtures/normalize-subfield-value/100_and_880/expectedResult.json +22 -0
- package/test-fixtures/normalize-subfield-value/100_and_880/metadata.json +5 -0
- package/test-fixtures/normalize-subfield-value/100_and_880/record.json +20 -0
- package/test-fixtures/punctuation2/100_and_880/expectedResult.json +22 -0
- package/test-fixtures/punctuation2/100_and_880/metadata.json +6 -0
- package/test-fixtures/punctuation2/100_and_880/record.json +20 -0
- package/test-fixtures/punctuation2/240/expectedResult.json +12 -0
- package/test-fixtures/punctuation2/240/metadata.json +6 -0
- package/test-fixtures/punctuation2/240/record.json +10 -0
- package/test-fixtures/punctuation2/800/expectedResult.json +15 -0
- package/test-fixtures/punctuation2/800/metadata.json +6 -0
- package/test-fixtures/punctuation2/800/record.json +14 -0
- package/test-fixtures/strip-punctuation/100_and_880/expectedResult.json +37 -0
- package/test-fixtures/strip-punctuation/100_and_880/metadata.json +5 -0
- package/test-fixtures/strip-punctuation/100_and_880/record.json +35 -0
- package/test-fixtures/strip-punctuation/240/expectedResult.json +16 -0
- package/test-fixtures/strip-punctuation/240/metadata.json +6 -0
- package/test-fixtures/strip-punctuation/240/record.json +14 -0
package/src/punctuation2.js
CHANGED
|
@@ -10,27 +10,29 @@
|
|
|
10
10
|
* (They are jumped over when looking for next (non-controlfield subfield)
|
|
11
11
|
*/
|
|
12
12
|
import {validateSingleField} from './ending-punctuation';
|
|
13
|
+
import {fieldGetUnambiguousTag} from './subfield6Utils';
|
|
13
14
|
//import createDebugLogger from 'debug';
|
|
14
|
-
import {fieldToString, nvdebug
|
|
15
|
+
import {fieldToString, nvdebug} from './utils';
|
|
15
16
|
import clone from 'clone';
|
|
16
17
|
|
|
17
18
|
//const debug = createDebugLogger('debug/punctuation2');
|
|
18
19
|
|
|
20
|
+
const descriptionString = 'Remove invalid and add valid punctuation to data fields';
|
|
19
21
|
export default function () {
|
|
20
22
|
return {
|
|
21
|
-
description:
|
|
23
|
+
description: descriptionString,
|
|
22
24
|
validate, fix
|
|
23
25
|
};
|
|
24
26
|
|
|
25
27
|
function fix(record) {
|
|
26
|
-
nvdebug(
|
|
28
|
+
nvdebug(`${descriptionString}: fixer`);
|
|
27
29
|
const res = {message: [], fix: [], valid: true};
|
|
28
30
|
record.fields.forEach(f => fieldFixPunctuation(f));
|
|
29
31
|
return res;
|
|
30
32
|
}
|
|
31
33
|
|
|
32
34
|
function validate(record) {
|
|
33
|
-
nvdebug(
|
|
35
|
+
nvdebug(`${descriptionString}: validate`);
|
|
34
36
|
|
|
35
37
|
const fieldsNeedingModification = record.fields.filter(f => fieldNeedsModification(f, true));
|
|
36
38
|
|
|
@@ -84,9 +86,9 @@ export function fieldNeedsModification(field, add = true) {
|
|
|
84
86
|
|
|
85
87
|
|
|
86
88
|
//const stripCrap = / *[-;:,+]+$/u;
|
|
87
|
-
const commaNeedsPuncAfter = /(?:[a-z0-9A-Z]|å|ä|ö|Å|Ä|Ö|\))$/u;
|
|
88
89
|
const defaultNeedsPuncAfter = /(?:[a-z0-9A-Z]|å|ä|ö|Å|Ä|Ö)$/u;
|
|
89
90
|
const defaultNeedsPuncAfter2 = /(?:[\]a-zA-Z0-9)]|ä|å|ö|Å|Ä|Ö)$/u;
|
|
91
|
+
const doesNotEndInPunc = /[^!?.:;,]$/u; // non-punc for pre-240/700/XXX $, note that '.' comes if preceded by ')'
|
|
90
92
|
const blocksPuncRHS = /^(?:\()/u;
|
|
91
93
|
const allowsPuncRHS = /^(?:[A-Za-z0-9]|å|ä|ö|Å|Ä|Ö)/u;
|
|
92
94
|
|
|
@@ -96,7 +98,7 @@ const puncIsProbablyPunc = /(?:[a-z0-9)]|å|ä|ö) ?[.,:;]$/u;
|
|
|
96
98
|
|
|
97
99
|
// Will unfortunately trigger "Sukunimi, Th." type:
|
|
98
100
|
const removeColons = {'code': 'abcdefghijklmnopqrstuvwxyz', 'remove': / *[;:]$/u};
|
|
99
|
-
const removeX00Comma = {'code': '
|
|
101
|
+
const removeX00Comma = {'code': 'abcdenqt', 'followedBy': 'abcdenqtv#', 'context': /.,$/u, 'remove': /,$/u};
|
|
100
102
|
const cleanRHS = {'code': 'abcd', 'followedBy': 'bcde', 'context': /(?:(?:[a-z0-9]|å|ä|ö)\.|,)$/u, 'contextRHS': blocksPuncRHS, 'remove': /[.,]$/u};
|
|
101
103
|
const cleanX00dCommaOrDot = {'code': 'd', 'followedBy': 'et#', 'context': /[0-9]-[,.]$/u, 'remove': /[,.]$/u};
|
|
102
104
|
const cleanX00aDot = {'code': 'abcde', 'followedBy': 'cdegj', 'context': dotIsProbablyPunc, 'remove': /\.$/u};
|
|
@@ -104,22 +106,28 @@ const cleanCorruption = {'code': 'abcdefghijklmnopqrstuvwxyz', 'remove': / \.$/u
|
|
|
104
106
|
// These $e dot removals are tricky: before removing the comma, we should know that it ain't an abbreviation such as "esitt."...
|
|
105
107
|
const cleanX00eDot = {'code': 'e', 'followedBy': 'egj#', 'context': /(?:[ai]ja|jä)[.,]$/u, 'remove': /\.$/u};
|
|
106
108
|
|
|
109
|
+
const removeCommaBeforeLanguageSubfieldL = {'followedBy': 'l', 'remove': /,$/u};
|
|
110
|
+
const removeCommaBeforeTitleSubfieldT = {'followedBy': 't', 'remove': /,$/u};
|
|
111
|
+
|
|
107
112
|
const X00RemoveDotAfterBracket = {'code': 'cq', 'context': /\)\.$/u, 'remove': /\.$/u};
|
|
108
113
|
// 390, 800, 810, 830...
|
|
109
114
|
const cleanPuncBeforeLanguage = {'code': 'atvxyz', 'followedBy': 'l', 'context': puncIsProbablyPunc, 'remove': / *[.,:;]$/u};
|
|
110
115
|
|
|
111
116
|
|
|
112
|
-
const addX00aComma = {'add': ',', 'code': '
|
|
117
|
+
const addX00aComma = {'add': ',', 'code': 'abcqej', 'followedBy': 'cdeg', 'context': doesNotEndInPunc, 'contextRHS': allowsPuncRHS};
|
|
118
|
+
const addX00dComma = {'name': 'X00$d ending in "-" does not get comma', 'add': ',', 'code': 'd', 'followedBy': 'cdeg', 'context': /[^-,.!]$/u, 'contextRHS': allowsPuncRHS};
|
|
113
119
|
const addX00aComma2 = {'add': ',', 'code': 'abcdej', 'followedBy': 'cdeg', 'context': /(?:[A-Z]|Å|Ä|Ö)\.$/u, 'contextRHS': allowsPuncRHS};
|
|
114
|
-
const
|
|
120
|
+
const addX00Dot = {'add': '.', 'code': 'abcdetv', 'followedBy': '#fklptu', 'context': defaultNeedsPuncAfter};
|
|
121
|
+
|
|
115
122
|
|
|
116
123
|
//const addX10iaComma = {'name': 'Punctuate relationship information', 'code': 'i', 'followedBy': 'a', 'context': defaultNeedsPuncAfter2};
|
|
117
124
|
const addX10bDot = {'name': 'Add X10 pre-$b dot', 'add': '.', 'code': 'ab', 'followedBy': 'b', 'context': defaultNeedsPuncAfter};
|
|
118
125
|
const addX10eComma = {'add': ',', 'code': 'abe', 'followedBy': 'e', 'context': defaultNeedsPuncAfter};
|
|
119
126
|
const addX10Dot = {'name': 'Add X10 final dot', 'add': '.', 'code': 'abet', 'followedBy': 'tu#', 'context': defaultNeedsPuncAfter};
|
|
120
|
-
const addLanguageComma = {'name': 'Add comma before 810$l', 'add': ',', 'code': 'tv', 'followedBy': 'l', 'context': defaultNeedsPuncAfter2};
|
|
121
127
|
const addColonToRelationshipInformation = {'name': 'Add \':\' to 7X0 $i relationship info', 'add': ':', 'code': 'i', 'context': defaultNeedsPuncAfter2};
|
|
122
128
|
|
|
129
|
+
const addDotBeforeLanguageSubfieldL = {'name': 'Add dot before $l', 'add': '.', 'code': 'abepst', 'followedBy': 'l', 'context': doesNotEndInPunc};
|
|
130
|
+
|
|
123
131
|
// 490:
|
|
124
132
|
const addSemicolonBeforeVolumeDesignation = {'name': 'Add " ;" before $v', 'add': ' ;', 'code': 'atxyz', 'followedBy': 'v', 'context': /[^;]$/u};
|
|
125
133
|
|
|
@@ -131,8 +139,12 @@ const REMOVE_AND_ADD = 3;
|
|
|
131
139
|
// Crappy punctuation consists of various crap that is somewhat common.
|
|
132
140
|
// We strip crap for merge decisions. We are not trying to actively remove crap here.
|
|
133
141
|
|
|
134
|
-
const
|
|
135
|
-
|
|
142
|
+
const removeCrapFromAllEntryFields = [removeCommaBeforeLanguageSubfieldL, removeCommaBeforeTitleSubfieldT];
|
|
143
|
+
|
|
144
|
+
const removeX00Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, cleanX00dCommaOrDot, cleanRHS, X00RemoveDotAfterBracket, removeColons, cleanPuncBeforeLanguage, ...removeCrapFromAllEntryFields];
|
|
145
|
+
const removeX10Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, removeColons, cleanPuncBeforeLanguage, ...removeCrapFromAllEntryFields];
|
|
146
|
+
const removeX11Whatever = removeCrapFromAllEntryFields;
|
|
147
|
+
const removeX30Whatever = removeCrapFromAllEntryFields;
|
|
136
148
|
|
|
137
149
|
const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remove': /(?: *;| *=|,)$/u}];
|
|
138
150
|
|
|
@@ -152,12 +164,16 @@ const crappy24X = [
|
|
|
152
164
|
{'code': 'abc', 'followedBy': '#', 'remove': /\.$/u, 'context': dotIsProbablyPunc},
|
|
153
165
|
{'code': 'abfghinp', 'followedBy': '#', 'remove': /\.$/u, 'context': dotIsProbablyPunc},
|
|
154
166
|
{'code': 'n', 'followedBy': 'p', 'remove': /\.$/u, 'context': dotIsProbablyPunc}, // MELINDA-8817
|
|
155
|
-
{'code': 'p', 'followedBy': 'pc', 'remove': /\.$/u, 'context': dotIsProbablyPunc} // MELINDA-8817
|
|
167
|
+
{'code': 'p', 'followedBy': 'pc', 'remove': /\.$/u, 'context': dotIsProbablyPunc}, // MELINDA-8817
|
|
168
|
+
removeCommaBeforeLanguageSubfieldL
|
|
156
169
|
];
|
|
157
170
|
|
|
171
|
+
|
|
158
172
|
const cleanCrappyPunctuationRules = {
|
|
159
173
|
'100': removeX00Whatever,
|
|
160
174
|
'110': removeX10Whatever,
|
|
175
|
+
'111': removeX11Whatever,
|
|
176
|
+
'130': removeX30Whatever,
|
|
161
177
|
'240': crappy24X,
|
|
162
178
|
'245': crappy24X,
|
|
163
179
|
'246': crappy24X,
|
|
@@ -173,8 +189,12 @@ const cleanCrappyPunctuationRules = {
|
|
|
173
189
|
'490': remove490And830Whatever,
|
|
174
190
|
'600': removeX00Whatever,
|
|
175
191
|
'610': removeX10Whatever,
|
|
192
|
+
'611': removeX11Whatever,
|
|
193
|
+
'630': removeX30Whatever,
|
|
176
194
|
'700': removeX00Whatever,
|
|
177
195
|
'710': removeX10Whatever,
|
|
196
|
+
'711': removeX11Whatever,
|
|
197
|
+
'730': removeX30Whatever,
|
|
178
198
|
'773': linkingEntryWhatever,
|
|
179
199
|
'774': linkingEntryWhatever,
|
|
180
200
|
'776': linkingEntryWhatever,
|
|
@@ -189,15 +209,16 @@ const cleanLegalX00Comma = {'code': 'abcde', 'followedBy': 'cdegj', 'context': /
|
|
|
189
209
|
const cleanLegalX00bDot = {'code': 'b', 'followedBy': 't#', context: /^[IVXLCDM]+\.$/u, 'remove': /\.$/u};
|
|
190
210
|
const cleanLegalX00iColon = {'code': 'i', 'followedBy': 'a', 'remove': / *:$/u}; // NB! context is not needed
|
|
191
211
|
const cleanLegalX00Dot = {'code': 'abcdetvl', 'followedBy': 'tu#', 'context': /(?:[a-z0-9)]|å|ä|ö)\.$/u, 'remove': /\.$/u};
|
|
192
|
-
const
|
|
212
|
+
const cleanDotBeforeLanguageSubfieldL = {'name': 'pre-language-$l dot', 'followedBy': 'l', 'context': /.\.$/u, 'remove': /\.$/u};
|
|
193
213
|
|
|
214
|
+
const legalEntryField = [cleanDotBeforeLanguageSubfieldL];
|
|
194
215
|
|
|
195
|
-
const legalX00punc = [cleanLegalX00Comma, cleanLegalX00iColon, cleanLegalX00bDot, cleanLegalX00Dot,
|
|
216
|
+
const legalX00punc = [cleanLegalX00Comma, cleanLegalX00iColon, cleanLegalX00bDot, cleanLegalX00Dot, ...legalEntryField];
|
|
196
217
|
|
|
197
218
|
const cleanLegalX10Comma = {'name': 'X10comma', 'code': 'abe', 'followedBy': 'e', 'context': /.,$/u, 'remove': /,$/u};
|
|
198
219
|
const cleanLegalX10Dot = {'name': 'X10dot', 'code': 'abt', 'followedBy': 'bst#', 'context': /.\.$/u, 'remove': /\.$/u};
|
|
199
220
|
|
|
200
|
-
const legalX10punc = [cleanLegalX10Comma, cleanLegalX10Dot, cleanX00eDot,
|
|
221
|
+
const legalX10punc = [cleanLegalX10Comma, cleanLegalX10Dot, cleanX00eDot, ...legalEntryField];
|
|
201
222
|
|
|
202
223
|
const cleanLegalSeriesTitle = [ // 490 and 830
|
|
203
224
|
{'code': 'a', 'followedBy': 'a', 'remove': / =$/u},
|
|
@@ -213,18 +234,17 @@ const clean24X = [
|
|
|
213
234
|
{'name': 'ABFNP:C', 'code': 'abfnp', 'followedBy': 'c', 'remove': / \/$/u},
|
|
214
235
|
{'name': 'ABN:N', 'code': 'abn', 'followedBy': 'n', 'remove': /\.$/u},
|
|
215
236
|
{'name': 'ABNP:#', 'code': 'abnp', 'followedBy': '#', 'remove': /\.$/u},
|
|
216
|
-
{'name': 'N:P', 'code': 'n', 'followedBy': 'p', 'remove': /,$/u}
|
|
237
|
+
{'name': 'N:P', 'code': 'n', 'followedBy': 'p', 'remove': /,$/u},
|
|
238
|
+
cleanDotBeforeLanguageSubfieldL
|
|
217
239
|
];
|
|
218
240
|
|
|
219
241
|
const cleanValidPunctuationRules = {
|
|
220
242
|
'100': legalX00punc,
|
|
221
243
|
'110': legalX10punc,
|
|
222
|
-
'
|
|
223
|
-
'
|
|
224
|
-
'
|
|
225
|
-
'
|
|
226
|
-
'800': legalX00punc,
|
|
227
|
-
'810': legalX10punc,
|
|
244
|
+
'111': legalEntryField,
|
|
245
|
+
'130': legalEntryField,
|
|
246
|
+
'240': clean24X,
|
|
247
|
+
'243': clean24X,
|
|
228
248
|
'245': clean24X,
|
|
229
249
|
'246': clean24X,
|
|
230
250
|
'260': [
|
|
@@ -248,34 +268,48 @@ const cleanValidPunctuationRules = {
|
|
|
248
268
|
],
|
|
249
269
|
'490': cleanLegalSeriesTitle,
|
|
250
270
|
'534': [{'code': 'p', 'followedBy': 'c', 'remove': /:$/u}],
|
|
271
|
+
'600': legalX00punc,
|
|
272
|
+
'610': legalX10punc,
|
|
273
|
+
'611': legalEntryField,
|
|
274
|
+
'630': legalEntryField,
|
|
251
275
|
// Experimental, MET366-ish (end punc in internationally valid, but we don't use it here in Finland):
|
|
252
276
|
'648': [{'code': 'a', 'content': /^[0-9]+\.$/u, 'ind2': ['4'], 'remove': /\.$/u}],
|
|
253
|
-
'
|
|
277
|
+
'700': legalX00punc,
|
|
278
|
+
'710': legalX10punc,
|
|
279
|
+
'711': legalEntryField,
|
|
280
|
+
'730': legalEntryField,
|
|
281
|
+
'800': legalX00punc,
|
|
282
|
+
'810': legalX10punc,
|
|
283
|
+
'811': legalEntryField,
|
|
284
|
+
'830': [...legalEntryField, ...cleanLegalSeriesTitle],
|
|
254
285
|
'946': clean24X
|
|
255
|
-
|
|
256
286
|
};
|
|
257
287
|
|
|
258
|
-
// addColonToRelationshipInformation only applies to 700/710 but as others don't have $i, it's fine
|
|
259
|
-
const addX00 = [addX00aComma, addX00aComma2, addX00aDot, addLanguageComma, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
|
|
260
|
-
const addX10 = [addX10bDot, addX10eComma, addX10Dot, addLanguageComma, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
|
|
261
288
|
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
];
|
|
289
|
+
// Overgeneralizes a bit: eg. addColonToRelationshipInformation only applies to 700/710 but as others don't have $i, it's fine.
|
|
290
|
+
const addToAllEntryFields = [addDotBeforeLanguageSubfieldL, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
const addX00 = [addX00aComma, addX00aComma2, addX00Dot, addX00dComma, ...addToAllEntryFields];
|
|
294
|
+
const addX10 = [addX10bDot, addX10eComma, addX10Dot, ...addToAllEntryFields];
|
|
295
|
+
const addX11 = [...addToAllEntryFields];
|
|
296
|
+
const addX30 = [...addToAllEntryFields];
|
|
271
297
|
|
|
272
|
-
const
|
|
298
|
+
const add24X = [
|
|
273
299
|
{'code': 'i', 'followedBy': 'a', 'add': ':', 'context': defaultNeedsPuncAfter},
|
|
274
300
|
{'code': 'a', 'followedBy': 'b', 'add': ' :', 'context': defaultNeedsPuncAfter},
|
|
275
301
|
{'code': 'abk', 'followedBy': 'f', 'add': ',', 'context': defaultNeedsPuncAfter},
|
|
276
|
-
{'code': 'abfnp', 'followedBy': 'c', 'add': ' /', 'context': defaultNeedsPuncAfter}
|
|
302
|
+
{'code': 'abfnp', 'followedBy': 'c', 'add': ' /', 'context': defaultNeedsPuncAfter},
|
|
303
|
+
addDotBeforeLanguageSubfieldL
|
|
277
304
|
];
|
|
278
305
|
|
|
306
|
+
const add245 = [
|
|
307
|
+
...add24X,
|
|
308
|
+
// Blah! Also "$a = $b" and "$a ; $b" can be valid... But ' :' is better than nothing, I guess...
|
|
309
|
+
{'code': 'ab', 'followedBy': 'n', 'add': '.', 'context': defaultNeedsPuncAfter},
|
|
310
|
+
{'code': 'n', 'followedBy': 'p', 'add': ',', 'context': defaultNeedsPuncAfter},
|
|
311
|
+
{'code': 'abc', 'followedBy': '#', 'add': '.', 'context': defaultNeedsPuncAfter} // Stepping on "punctuation validator's" toes
|
|
312
|
+
];
|
|
279
313
|
|
|
280
314
|
const addSeriesTitle = [ // 490 and 830
|
|
281
315
|
{'code': 'a', 'followedBy': 'a', 'add': ' =', 'context': defaultNeedsPuncAfter2},
|
|
@@ -286,9 +320,12 @@ const addSeriesTitle = [ // 490 and 830
|
|
|
286
320
|
const addPairedPunctuationRules = {
|
|
287
321
|
'100': addX00,
|
|
288
322
|
'110': addX10,
|
|
289
|
-
'
|
|
323
|
+
'111': addX11,
|
|
324
|
+
'130': addX30,
|
|
325
|
+
'240': add24X,
|
|
326
|
+
'243': add24X,
|
|
290
327
|
'245': add245,
|
|
291
|
-
'246':
|
|
328
|
+
'246': add24X,
|
|
292
329
|
'260': [
|
|
293
330
|
{'code': 'a', 'followedBy': 'b', 'add': ' :', 'context': defaultNeedsPuncAfter2},
|
|
294
331
|
{'code': 'b', 'followedBy': 'c', 'add': ',', 'context': defaultNeedsPuncAfter2},
|
|
@@ -313,11 +350,16 @@ const addPairedPunctuationRules = {
|
|
|
313
350
|
'534': [{'code': 'p', 'followedBy': 'c', 'add': ':', 'context': defaultNeedsPuncAfter2}],
|
|
314
351
|
'600': addX00,
|
|
315
352
|
'610': addX10,
|
|
353
|
+
'611': addX11,
|
|
354
|
+
'630': addX30,
|
|
316
355
|
'700': addX00,
|
|
317
356
|
'710': addX10,
|
|
357
|
+
'711': addX11,
|
|
358
|
+
'730': addX30,
|
|
318
359
|
'800': addX00,
|
|
319
360
|
'810': addX10,
|
|
320
|
-
'
|
|
361
|
+
'811': addX11,
|
|
362
|
+
'830': [...addX30, ...addSeriesTitle],
|
|
321
363
|
'946': [{'code': 'i', 'followedBy': 'a', 'add': ':', 'context': defaultNeedsPuncAfter}]
|
|
322
364
|
};
|
|
323
365
|
|
|
@@ -340,6 +382,9 @@ function debugRule(rule) {
|
|
|
340
382
|
*/
|
|
341
383
|
|
|
342
384
|
function ruleAppliesToSubfieldCode(targetSubfieldCodes, currSubfieldCode) {
|
|
385
|
+
if (!targetSubfieldCodes) { // We are not interested in what subfield precedes 240$l, ',' is removed anyway
|
|
386
|
+
return true;
|
|
387
|
+
}
|
|
343
388
|
const negation = targetSubfieldCodes.includes('!');
|
|
344
389
|
if (negation) {
|
|
345
390
|
return !targetSubfieldCodes.includes(currSubfieldCode);
|
|
@@ -424,38 +469,40 @@ function checkRule(rule, field, subfield1, subfield2) {
|
|
|
424
469
|
return true;
|
|
425
470
|
}
|
|
426
471
|
|
|
427
|
-
function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, operation = NONE) {
|
|
428
|
-
|
|
429
|
-
if (!(`${field.tag}` in ruleArray) || ruleArray === null || operation === NONE) {
|
|
430
|
-
|
|
431
|
-
/*
|
|
432
|
-
if (!['020', '650'].includes(tag) || !isControlSubfieldCode(subfield1.code)) { // eslint-disable-line functional/no-conditional-statements
|
|
433
|
-
nvdebug(`No punctuation rules found for ${tag} (looking for: ‡${subfield1.code})`, debug);
|
|
434
472
|
|
|
435
|
-
|
|
436
|
-
|
|
473
|
+
function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, operation = NONE) {
|
|
474
|
+
if (operation === NONE || ruleArray === null) { // !fieldIsApplicable(field, ruleArray)) {
|
|
437
475
|
return;
|
|
438
476
|
}
|
|
439
|
-
|
|
477
|
+
const tag2 = field.tag === '880' ? fieldGetUnambiguousTag(field) : field.tag;
|
|
478
|
+
if (!tag2) {
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
if (!(`${tag2}` in ruleArray)) {
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
//nvdebug(`PUNCTUATE ${field.tag}/${tag2} '${subfieldToString(subfield1)}' XXX '${subfield2 ? subfieldToString(subfield2) : '#'} }`);
|
|
440
486
|
|
|
441
|
-
//nvdebug(`OP=${operation} ${
|
|
442
|
-
const candRules = ruleArray[
|
|
487
|
+
//nvdebug(`OP=${operation} ${tag2}: '${subfield1.code}: ${subfield1.value}' ??? '${subfield2 ? subfield2.code : '#'}'`);
|
|
488
|
+
const candRules = ruleArray[tag2];
|
|
443
489
|
candRules.forEach(rule => {
|
|
444
490
|
//debugRule(rule);
|
|
445
|
-
|
|
491
|
+
//nvdebug(' WP1');
|
|
446
492
|
if (!checkRule(rule, field, subfield1, subfield2)) {
|
|
447
493
|
return;
|
|
448
494
|
}
|
|
495
|
+
//nvdebug(' WP2');
|
|
449
496
|
|
|
450
497
|
//const originalValue = subfield1.value;
|
|
451
498
|
if (rule.remove && [REMOVE, REMOVE_AND_ADD].includes(operation) && subfield1.value.match(rule.remove)) { // eslint-disable-line functional/no-conditional-statements
|
|
452
499
|
//nvdebug(` PUNC REMOVAL TO BE PERFORMED FOR $${subfield1.code} '${subfield1.value}'`, debug);
|
|
453
500
|
subfield1.value = subfield1.value.replace(rule.remove, ''); // eslint-disable-line functional/immutable-data
|
|
454
|
-
//nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'
|
|
501
|
+
//nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'`);
|
|
455
502
|
}
|
|
456
503
|
if (rule.add && [ADD, REMOVE_AND_ADD].includes(operation)) { // eslint-disable-line functional/no-conditional-statements
|
|
457
504
|
subfield1.value += rule.add; // eslint-disable-line functional/immutable-data
|
|
458
|
-
//nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}'
|
|
505
|
+
//nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}'`);
|
|
459
506
|
}
|
|
460
507
|
|
|
461
508
|
/*
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
//import createDebugLogger from 'debug';
|
|
2
2
|
import clone from 'clone';
|
|
3
3
|
import {fieldHasSubfield, fieldToString} from './utils';
|
|
4
|
+
import {fieldFixPunctuation} from './punctuation2';
|
|
5
|
+
import {fieldGetUnambiguousTag} from './subfield6Utils';
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
// Author(s): Nicholas Volk
|
|
@@ -69,20 +71,41 @@ function handleInitials(value, subfieldCode, field) {
|
|
|
69
71
|
// initial space confirms us that it's an initial
|
|
70
72
|
return str.match(/ (?:[A-Z]|Å|Ä|Ö)\.(?:[A-Z]|Å|Ä|Ö)/u);
|
|
71
73
|
}
|
|
72
|
-
|
|
73
74
|
}
|
|
75
|
+
|
|
74
76
|
function getNormalizedValue(subfield, field) {
|
|
75
|
-
|
|
76
|
-
let value = subfield.value;
|
|
77
|
-
value = handleInitials(value, subfield.code, field);
|
|
77
|
+
return uppercaseLanguage(handleMovies(handleInitials(subfield.value, subfield.code, field)));
|
|
78
78
|
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
function handleMovies(value) {
|
|
81
|
+
if (subfield.code === 'a' && ['130', '630', '730'].includes(field.tag)) {
|
|
82
|
+
// MRA-614: "(elokuva, 2000)" => "(elokuva : 2000)""
|
|
83
|
+
return value.replace(/\((elokuva), (19[0-9][0-9]|20[0-2][0-9])\)/u, '($1 : $2)'); // eslint-disable-line prefer-named-capture-group
|
|
84
|
+
}
|
|
85
|
+
return value;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function uppercaseLanguage(value) { // Part of MET-549
|
|
89
|
+
const relevantTags = ['130', '240', '243', '600', '610', '611', '630', '700', '710', '711', '730', '800', '810', '811', '830'];
|
|
90
|
+
|
|
91
|
+
if (subfield.code !== 'l') {
|
|
92
|
+
return value;
|
|
93
|
+
}
|
|
94
|
+
const targetTag = tagForUppercasing();
|
|
95
|
+
if (relevantTags.includes(targetTag)) {
|
|
96
|
+
const newValue = `${value[0].toUpperCase()}${value.slice(1)}`;
|
|
97
|
+
if (newValue !== value) {
|
|
98
|
+
fieldFixPunctuation(field); // Rather hackily try to fix prev punc on the fly
|
|
99
|
+
return newValue;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function tagForUppercasing() {
|
|
104
|
+
return field.tag === '880' ? fieldGetUnambiguousTag(field) : field.tag;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return value;
|
|
83
108
|
}
|
|
84
|
-
/* eslint-enable */
|
|
85
|
-
return value;
|
|
86
109
|
}
|
|
87
110
|
|
|
88
111
|
function normalizeSubfieldValues(field) {
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_validationOptions": {},
|
|
3
|
+
"fields": [
|
|
4
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
5
|
+
{ "tag": "130", "ind1": "1", "ind2": " ", "subfields": [
|
|
6
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
7
|
+
{ "code": "t", "value": "Opus." },
|
|
8
|
+
{ "code": "l", "value": "Suomi" }
|
|
9
|
+
]},
|
|
10
|
+
{ "tag": "240", "ind1": "1", "ind2": " ", "subfields": [
|
|
11
|
+
{ "code": "a", "value": "Supo." },
|
|
12
|
+
{ "code": "l", "value": "Saame" }
|
|
13
|
+
]},
|
|
14
|
+
{ "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
|
|
15
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
16
|
+
{ "code": "t", "value": "Opus." },
|
|
17
|
+
{ "code": "l", "value": "Suomi" }
|
|
18
|
+
]},
|
|
19
|
+
{ "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
|
|
20
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
21
|
+
{ "code": "t", "value": "Opus." },
|
|
22
|
+
{ "code": "l", "value": "Suomi" }
|
|
23
|
+
]}
|
|
24
|
+
],
|
|
25
|
+
"leader": ""
|
|
26
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fields": [
|
|
3
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
4
|
+
{ "tag": "130", "ind1": "1", "ind2": " ", "subfields": [
|
|
5
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
6
|
+
{ "code": "t", "value": "Opus," },
|
|
7
|
+
{ "code": "l", "value": "suomi" }
|
|
8
|
+
]},
|
|
9
|
+
{ "tag": "240", "ind1": "1", "ind2": " ", "subfields": [
|
|
10
|
+
{ "code": "a", "value": "Supo," },
|
|
11
|
+
{ "code": "l", "value": "saame" }
|
|
12
|
+
]},
|
|
13
|
+
{ "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
|
|
14
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
15
|
+
{ "code": "t", "value": "Opus." },
|
|
16
|
+
{ "code": "l", "value": "Suomi" }
|
|
17
|
+
]},
|
|
18
|
+
{ "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
|
|
19
|
+
{ "code": "a", "value": "Sukunimi, A. B." },
|
|
20
|
+
{ "code": "t", "value": "Opus," },
|
|
21
|
+
{ "code": "l", "value": "suomi" }
|
|
22
|
+
]}
|
|
23
|
+
|
|
24
|
+
]
|
|
25
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_validationOptions": {},
|
|
3
|
+
"fields": [
|
|
4
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
5
|
+
{ "tag": "700", "ind1": "1", "ind2": "2", "subfields": [
|
|
6
|
+
{ "code": "6", "value": "880-06" },
|
|
7
|
+
{ "code": "a", "value": "Jang, Gangmyeong," },
|
|
8
|
+
{ "code": "d", "value": "1975-" },
|
|
9
|
+
{ "code": "t", "value": "Albasaeng jareugi." },
|
|
10
|
+
{ "code": "l", "value": "Englanti."}
|
|
11
|
+
]},
|
|
12
|
+
{ "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
|
|
13
|
+
{ "code": "6", "value": "700-06/$1" },
|
|
14
|
+
{ "code": "a", "value": "장강명," },
|
|
15
|
+
{ "code": "d", "value": "1975-" },
|
|
16
|
+
{ "code": "t", "value": "알바생 자르기." },
|
|
17
|
+
{ "code": "l", "value": "Englanti."
|
|
18
|
+
}
|
|
19
|
+
]}
|
|
20
|
+
],
|
|
21
|
+
"leader": ""
|
|
22
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fields": [
|
|
3
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
4
|
+
{ "tag": "700", "ind1": "1", "ind2": "2", "subfields": [
|
|
5
|
+
{ "code": "6", "value": "880-06" },
|
|
6
|
+
{ "code": "a", "value": "Jang, Gangmyeong," },
|
|
7
|
+
{ "code": "d", "value": "1975-" },
|
|
8
|
+
{ "code": "t", "value": "Albasaeng jareugi," },
|
|
9
|
+
{ "code": "l", "value": "englanti."}
|
|
10
|
+
]},
|
|
11
|
+
{ "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
|
|
12
|
+
{ "code": "6", "value": "700-06/$1" },
|
|
13
|
+
{ "code": "a", "value": "장강명," },
|
|
14
|
+
{ "code": "d", "value": "1975-" },
|
|
15
|
+
{ "code": "t", "value": "알바생 자르기," },
|
|
16
|
+
{ "code": "l", "value": "englanti."
|
|
17
|
+
}
|
|
18
|
+
]}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fields": [
|
|
3
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
4
|
+
{ "tag": "100", "ind1": "1", "ind2": "2", "subfields": [
|
|
5
|
+
{ "code": "6", "value": "880-06" },
|
|
6
|
+
{ "code": "a", "value": "Jang, Gangmyeong," },
|
|
7
|
+
{ "code": "d", "value": "1975-" },
|
|
8
|
+
{ "code": "t", "value": "Albasaeng jareugi." },
|
|
9
|
+
{ "code": "l", "value": "englanti."}
|
|
10
|
+
]},
|
|
11
|
+
{ "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
|
|
12
|
+
{ "code": "6", "value": "100-06/$1" },
|
|
13
|
+
{ "code": "a", "value": "장강명," },
|
|
14
|
+
{ "code": "d", "value": "1975-" },
|
|
15
|
+
{ "code": "t", "value": "알바생 자르기." },
|
|
16
|
+
{ "code": "l", "value": "englanti."
|
|
17
|
+
}
|
|
18
|
+
]}
|
|
19
|
+
],
|
|
20
|
+
"leader": "",
|
|
21
|
+
"_validationOptions": {}
|
|
22
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"fields": [
|
|
3
|
+
{ "tag": "005", "value": "20220202020202.0" },
|
|
4
|
+
{ "tag": "100", "ind1": "1", "ind2": "2", "subfields": [
|
|
5
|
+
{ "code": "6", "value": "880-06" },
|
|
6
|
+
{ "code": "a", "value": "Jang, Gangmyeong" },
|
|
7
|
+
{ "code": "d", "value": "1975-," },
|
|
8
|
+
{ "code": "t", "value": "Albasaeng jareugi" },
|
|
9
|
+
{ "code": "l", "value": "englanti."}
|
|
10
|
+
]},
|
|
11
|
+
{ "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
|
|
12
|
+
{ "code": "6", "value": "100-06/$1" },
|
|
13
|
+
{ "code": "a", "value": "장강명" },
|
|
14
|
+
{ "code": "d", "value": "1975-." },
|
|
15
|
+
{ "code": "t", "value": "알바생 자르기" },
|
|
16
|
+
{ "code": "l", "value": "englanti."
|
|
17
|
+
}
|
|
18
|
+
]}
|
|
19
|
+
]
|
|
20
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"leader": "01331cam a22003494i 4500",
|
|
3
|
+
"_validationOptions": {},
|
|
4
|
+
"fields": [
|
|
5
|
+
{ "tag": "001", "value": "000000001" },
|
|
6
|
+
|
|
7
|
+
{ "tag": "240", "ind1": "1", "ind2": "0", "subfields": [
|
|
8
|
+
{ "code": "a", "value": "Seitsemän veljestä." },
|
|
9
|
+
{ "code": "l", "value": "ruotsi" }
|
|
10
|
+
]}
|
|
11
|
+
]
|
|
12
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"leader": "01331cam a22003494i 4500",
|
|
3
|
+
"_validationOptions": {},
|
|
4
|
+
"fields": [
|
|
5
|
+
{ "tag": "001", "value": "000000001" },
|
|
6
|
+
{ "tag": "800", "ind1": "1", "ind2": " ",
|
|
7
|
+
"subfields": [
|
|
8
|
+
{ "code": "a", "value": "Jaakko, Petteri," },
|
|
9
|
+
{ "code": "e", "value": "kirjoittaja." },
|
|
10
|
+
{ "code": "t", "value": "Etsiväsarja ;" },
|
|
11
|
+
{ "code": "v", "value": "14."},
|
|
12
|
+
{ "code": "l", "value": "suomi." }
|
|
13
|
+
]}
|
|
14
|
+
]
|
|
15
|
+
}
|