@natlibfi/marc-record-validators-melinda 12.0.0-alpha.5 → 12.0.0-alpha.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/{melinda-node-tests.yml → melinda-node-tests-and-publish.yml} +36 -11
- package/dist/ending-punctuation-conf.js +3 -2
- package/dist/ending-punctuation-conf.js.map +2 -2
- package/dist/ending-punctuation.js +42 -11
- package/dist/ending-punctuation.js.map +2 -2
- package/dist/ending-punctuation.test.js +169 -96
- package/dist/ending-punctuation.test.js.map +2 -2
- package/package.json +13 -14
- package/src/ending-punctuation-conf.js +3 -2
- package/src/ending-punctuation.js +56 -16
- package/src/ending-punctuation.test.js +156 -97
package/package.json
CHANGED
|
@@ -6,16 +6,15 @@
|
|
|
6
6
|
"name": "The National Library of Finland"
|
|
7
7
|
},
|
|
8
8
|
"keywords": [],
|
|
9
|
-
"homepage": "https://
|
|
9
|
+
"homepage": "https://www.npmjs.com/package/@natlibfi/marc-record-validators-melinda",
|
|
10
10
|
"bugs": {
|
|
11
11
|
"url": "https://github.com/natlibfi/marc-record-validators-melinda/issues"
|
|
12
12
|
},
|
|
13
13
|
"repository": {
|
|
14
|
-
"
|
|
15
|
-
"url": "git@github.com:natlibfi/marc-record-validators-melinda.git"
|
|
14
|
+
"url": "https://github.com/NatLibFi/marc-record-validators-melinda"
|
|
16
15
|
},
|
|
17
16
|
"license": "MIT",
|
|
18
|
-
"version": "12.0.0-alpha.
|
|
17
|
+
"version": "12.0.0-alpha.7",
|
|
19
18
|
"main": "./dist/index.js",
|
|
20
19
|
"publishConfig": {
|
|
21
20
|
"access": "public"
|
|
@@ -35,13 +34,13 @@
|
|
|
35
34
|
},
|
|
36
35
|
"dependencies": {
|
|
37
36
|
"@natlibfi/issn-verify": "^1.0.6",
|
|
38
|
-
"@natlibfi/marc-record": "
|
|
39
|
-
"@natlibfi/marc-record-serializers": "
|
|
40
|
-
"@natlibfi/marc-record-validate": "
|
|
41
|
-
"@natlibfi/melinda-commons": "
|
|
42
|
-
"@natlibfi/sfs-4900": "
|
|
43
|
-
"@natlibfi/iso9-1995": "
|
|
44
|
-
"@natlibfi/sru-client": "
|
|
37
|
+
"@natlibfi/marc-record": "next",
|
|
38
|
+
"@natlibfi/marc-record-serializers": "next",
|
|
39
|
+
"@natlibfi/marc-record-validate": "next",
|
|
40
|
+
"@natlibfi/melinda-commons": "next",
|
|
41
|
+
"@natlibfi/sfs-4900": "next",
|
|
42
|
+
"@natlibfi/iso9-1995": "next",
|
|
43
|
+
"@natlibfi/sru-client": "next",
|
|
45
44
|
"cld3-asm": "^4.0.0",
|
|
46
45
|
"clone": "^2.1.2",
|
|
47
46
|
"debug": "^4.4.3",
|
|
@@ -51,11 +50,11 @@
|
|
|
51
50
|
"xregexp": "^5.1.2"
|
|
52
51
|
},
|
|
53
52
|
"peerDependencies": {
|
|
54
|
-
"@natlibfi/marc-record-validate": "
|
|
53
|
+
"@natlibfi/marc-record-validate": "next"
|
|
55
54
|
},
|
|
56
55
|
"devDependencies": {
|
|
57
|
-
"@natlibfi/fixugen": "
|
|
58
|
-
"@natlibfi/fixura": "
|
|
56
|
+
"@natlibfi/fixugen": "next",
|
|
57
|
+
"@natlibfi/fixura": "next",
|
|
59
58
|
"cross-env": "^10.0.0",
|
|
60
59
|
"esbuild": "^0.25.10",
|
|
61
60
|
"eslint": "^9.36.0",
|
|
@@ -4,7 +4,8 @@ const finnishTerms = [
|
|
|
4
4
|
/^(?:kauno|slm|yso)\//u
|
|
5
5
|
];
|
|
6
6
|
|
|
7
|
-
const validPuncMarks = '
|
|
7
|
+
const validPuncMarks = '?-!.'; // NB! ')' and ']' are only valid for some fields!
|
|
8
|
+
const validQuoteChars = "\"'";
|
|
8
9
|
// Configuration specification
|
|
9
10
|
const confSpec = [
|
|
10
11
|
{ // 010-035 EI
|
|
@@ -666,4 +667,4 @@ const confSpec = [
|
|
|
666
667
|
}
|
|
667
668
|
];
|
|
668
669
|
|
|
669
|
-
export {finnishTerms, validPuncMarks, confSpec};
|
|
670
|
+
export {finnishTerms, validPuncMarks, validQuoteChars, confSpec};
|
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
*/
|
|
28
28
|
|
|
29
29
|
// Import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
30
|
-
import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
30
|
+
import {validPuncMarks, validQuoteChars, finnishTerms, confSpec} from './ending-punctuation-conf.js';
|
|
31
31
|
import createDebugLogger from 'debug';
|
|
32
32
|
|
|
33
33
|
const debug = createDebugLogger('@natlibfi/marc-record-validator-melinda/ending-punctuation');
|
|
@@ -71,34 +71,65 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
71
71
|
return subfields.slice(-1).shift();
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
+
function getDefaultPuncMarks(tag) {
|
|
75
|
+
if (tag.match(/^[1678](?:00|10|11|30)/u) || tag === '740') { // As defined in Loppupisteohje
|
|
76
|
+
return `${validPuncMarks})`;
|
|
77
|
+
}
|
|
78
|
+
// We don't want ').' here either. However, Loppupisteohje is a bit iffy here.
|
|
79
|
+
// BUG: Note that our generic rules will remove dot from Finnish terms such as https://finto.fi/yso-aika/fi/page/p1069910600
|
|
80
|
+
if (['647', '648', '650', '651', '654', '655', '656', '657', '658', '662'].includes(tag)) {
|
|
81
|
+
return `${validPuncMarks})`;
|
|
82
|
+
}
|
|
83
|
+
if(['260'].includes(tag)) {
|
|
84
|
+
return `${validPuncMarks})]`;
|
|
85
|
+
}
|
|
86
|
+
return validPuncMarks;
|
|
87
|
+
}
|
|
88
|
+
|
|
74
89
|
// Punctuation rule (Boolean), Check no ending dot strict (Boolean)
|
|
75
90
|
function normalPuncRules(subfield, punc, tag, checkEnd, overrideValidPuncMarks) {
|
|
76
|
-
const puncMarks = overrideValidPuncMarks ||
|
|
77
|
-
const
|
|
78
|
-
const
|
|
91
|
+
const puncMarks = overrideValidPuncMarks || getDefaultPuncMarks(tag);
|
|
92
|
+
const lastChar = subfield.value.slice(-1);
|
|
93
|
+
const lastPuncMark = puncMarks.includes(lastChar); // If string ends to punctuation char
|
|
94
|
+
const lastPuncDot = '.'.includes(lastChar); // If string ends to dot
|
|
95
|
+
const penultimateCharacter = subfield.value.length >= 2 ? subfield.value.charAt(subfield.value.length - 2) : undefined;
|
|
96
|
+
const antePenultimateCharacter = subfield.value.length >= 3 ? subfield.value.charAt(subfield.value.length - 3) : undefined;
|
|
79
97
|
|
|
80
|
-
|
|
98
|
+
|
|
99
|
+
// Last char should be punc, but it's not one of listed punctuation marks nor dot
|
|
81
100
|
if (punc && !(lastPuncMark || lastPuncDot)) {
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
101
|
+
console.log(puncMarks)
|
|
102
|
+
if (penultimateCharacter && validQuoteChars.includes(lastChar) && puncMarks.includes(penultimateCharacter)) {
|
|
103
|
+
// Exception: do nothing! Ending in punc+quote combo is all right, and does not imply a missing punc
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
// Console.log("1. Invalid punctuation - missing")
|
|
107
|
+
message.message.push(`Field ${tag} requires ending punctuation, ends in '${lastChar}'`);
|
|
108
|
+
if (fix) {
|
|
109
|
+
subfield.value = subfield.value.concat('.');
|
|
110
|
+
message.fix.push(`Field ${tag} - Added punctuation to $${subfield.code}`);
|
|
111
|
+
}
|
|
87
112
|
}
|
|
88
113
|
|
|
89
114
|
// Last char is dot, but previous char is one of punc marks, like 'Question?.'
|
|
90
|
-
} else if (lastPuncDot &&
|
|
115
|
+
} else if (lastPuncDot && penultimateCharacter && puncMarks.includes(penultimateCharacter)) {
|
|
91
116
|
// Console.log("2. Invalid punctuation - duplicate, like '?.'")
|
|
92
|
-
message.message.push(`Field ${tag} has
|
|
117
|
+
message.message.push(`Field ${tag} has an extra dot after '${penultimateCharacter}'`);
|
|
93
118
|
if (fix) {
|
|
94
119
|
subfield.value = subfield.value.slice(0, -1);
|
|
95
|
-
message.fix.push(`Field ${tag} - Removed
|
|
120
|
+
message.fix.push(`Field ${tag} - Removed dot after punctuation from $${subfield.code}`);
|
|
121
|
+
}
|
|
122
|
+
// Last char is dot, but previous two cars are punc+quote, like 'Lorum "Ipsum.".'
|
|
123
|
+
} else if (lastPuncDot && antePenultimateCharacter && validQuoteChars.includes(penultimateCharacter) && puncMarks.includes(antePenultimateCharacter)) {
|
|
124
|
+
message.message.push(`Field ${tag} has an extra dot in '${antePenultimateCharacter}${penultimateCharacter}${lastChar}'`);
|
|
125
|
+
if (fix) {
|
|
126
|
+
subfield.value = subfield.value.slice(0, -1);
|
|
127
|
+
message.fix.push(`Field ${tag} - Removed '${lastChar}' after '${antePenultimateCharacter}${penultimateCharacter}'`);
|
|
96
128
|
}
|
|
97
|
-
|
|
98
129
|
// Last char shouldn't be dot !! This is behind checkEnd boolean, because of dots at end of abbreviations, so this is checked only in special cases !!//
|
|
99
130
|
} else if (checkEnd && (!punc && lastPuncDot)) {
|
|
100
131
|
// Console.log("3. Invalid punctuation - Shouldn't be dot, is")
|
|
101
|
-
message.message.push(`Field ${tag} has
|
|
132
|
+
message.message.push(`Field ${tag} has unwanted ending punctuation '${lastChar}'`);
|
|
102
133
|
if (fix) {
|
|
103
134
|
subfield.value = subfield.value.slice(0, -1);
|
|
104
135
|
message.fix.push(`Field ${tag} - Removed punctuation from $${subfield.code}`);
|
|
@@ -136,7 +167,7 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
136
167
|
if (res.special.ifInd2 && res.special.ifInd2.includes(field.ind2)) {
|
|
137
168
|
normalPuncRules(lastSubField, res.special.ifBoth, tag, true, res.special.ifLastCharNot);
|
|
138
169
|
|
|
139
|
-
// Matches
|
|
170
|
+
// Matches exception to special rule, noPuncIfInd2 (likely with value 4, that indicates copyright mark)
|
|
140
171
|
} else if (res.special.noPuncIfInd2 && field.ind2 && res.special.noPuncIfInd2.includes(field.ind2)) {
|
|
141
172
|
normalPuncRules(lastSubField, !res.special.ifBoth, tag, true, res.special.ifLastCharNot);
|
|
142
173
|
|
|
@@ -199,6 +230,15 @@ function validateField(field, linkedTag, fix, message) {
|
|
|
199
230
|
|
|
200
231
|
validateField(field, linkedTag, fix, message);
|
|
201
232
|
}
|
|
233
|
+
// fallback
|
|
234
|
+
else {
|
|
235
|
+
debug(`special is definedm but no rule applies`);
|
|
236
|
+
const lastSubField = findLastSubfield(field);
|
|
237
|
+
|
|
238
|
+
if (lastSubField) {
|
|
239
|
+
normalPuncRules(lastSubField, res.punc, field.tag, false, false, fix, message);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
202
242
|
}
|
|
203
243
|
|
|
204
244
|
let res = null;
|