@natlibfi/marc-record-validators-melinda 12.0.0-alpha.6 → 12.0.0-alpha.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -14,7 +14,7 @@
14
14
  "url": "https://github.com/NatLibFi/marc-record-validators-melinda"
15
15
  },
16
16
  "license": "MIT",
17
- "version": "12.0.0-alpha.6",
17
+ "version": "12.0.0-alpha.7",
18
18
  "main": "./dist/index.js",
19
19
  "publishConfig": {
20
20
  "access": "public"
@@ -4,7 +4,8 @@ const finnishTerms = [
4
4
  /^(?:kauno|slm|yso)\//u
5
5
  ];
6
6
 
7
- const validPuncMarks = '?"-!,)]';
7
+ const validPuncMarks = '?-!.'; // NB! ')' and ']' are only valid for some fields!
8
+ const validQuoteChars = "\"'";
8
9
  // Configuration specification
9
10
  const confSpec = [
10
11
  { // 010-035 EI
@@ -666,4 +667,4 @@ const confSpec = [
666
667
  }
667
668
  ];
668
669
 
669
- export {finnishTerms, validPuncMarks, confSpec};
670
+ export {finnishTerms, validPuncMarks, validQuoteChars, confSpec};
@@ -27,7 +27,7 @@
27
27
  */
28
28
 
29
29
  // Import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
30
- import {validPuncMarks, finnishTerms, confSpec} from './ending-punctuation-conf.js';
30
+ import {validPuncMarks, validQuoteChars, finnishTerms, confSpec} from './ending-punctuation-conf.js';
31
31
  import createDebugLogger from 'debug';
32
32
 
33
33
  const debug = createDebugLogger('@natlibfi/marc-record-validator-melinda/ending-punctuation');
@@ -71,34 +71,65 @@ function validateField(field, linkedTag, fix, message) {
71
71
  return subfields.slice(-1).shift();
72
72
  }
73
73
 
74
+ function getDefaultPuncMarks(tag) {
75
+ if (tag.match(/^[1678](?:00|10|11|30)/u) || tag === '740') { // As defined in Loppupisteohje
76
+ return `${validPuncMarks})`;
77
+ }
78
+ // We don't want ').' here either. However, Loppupisteohje is a bit iffy here.
79
+ // BUG: Note that our generic rules will remove dot from Finnish terms such as https://finto.fi/yso-aika/fi/page/p1069910600
80
+ if (['647', '648', '650', '651', '654', '655', '656', '657', '658', '662'].includes(tag)) {
81
+ return `${validPuncMarks})`;
82
+ }
83
+ if(['260'].includes(tag)) {
84
+ return `${validPuncMarks})]`;
85
+ }
86
+ return validPuncMarks;
87
+ }
88
+
74
89
  // Punctuation rule (Boolean), Check no ending dot strict (Boolean)
75
90
  function normalPuncRules(subfield, punc, tag, checkEnd, overrideValidPuncMarks) {
76
- const puncMarks = overrideValidPuncMarks || validPuncMarks;
77
- const lastPuncMark = puncMarks.includes(subfield.value.slice(-1)); // If string ends to punctuation char
78
- const lastPuncDot = '.'.includes(subfield.value.slice(-1)); // If string ends to dot
91
+ const puncMarks = overrideValidPuncMarks || getDefaultPuncMarks(tag);
92
+ const lastChar = subfield.value.slice(-1);
93
+ const lastPuncMark = puncMarks.includes(lastChar); // If string ends to punctuation char
94
+ const lastPuncDot = '.'.includes(lastChar); // If string ends to dot
95
+ const penultimateCharacter = subfield.value.length >= 2 ? subfield.value.charAt(subfield.value.length - 2) : undefined;
96
+ const antePenultimateCharacter = subfield.value.length >= 3 ? subfield.value.charAt(subfield.value.length - 3) : undefined;
79
97
 
80
- // Last char should be punc, but its not one of marks nor dot
98
+
99
+ // Last char should be punc, but it's not one of listed punctuation marks nor dot
81
100
  if (punc && !(lastPuncMark || lastPuncDot)) {
82
- // Console.log("1. Invalid punctuation - missing")
83
- message.message.push(`Field ${tag} has invalid ending punctuation`);
84
- if (fix) {
85
- subfield.value = subfield.value.concat('.');
86
- message.fix.push(`Field ${tag} - Added punctuation to $${subfield.code}`);
101
+ console.log(puncMarks)
102
+ if (penultimateCharacter && validQuoteChars.includes(lastChar) && puncMarks.includes(penultimateCharacter)) {
103
+ // Exception: do nothing! Ending in punc+quote combo is all right, and does not imply a missing punc
104
+ }
105
+ else {
106
+ // Console.log("1. Invalid punctuation - missing")
107
+ message.message.push(`Field ${tag} requires ending punctuation, ends in '${lastChar}'`);
108
+ if (fix) {
109
+ subfield.value = subfield.value.concat('.');
110
+ message.fix.push(`Field ${tag} - Added punctuation to $${subfield.code}`);
111
+ }
87
112
  }
88
113
 
89
114
  // Last char is dot, but previous char is one of punc marks, like 'Question?.'
90
- } else if (lastPuncDot && subfield.value.length > 1 && puncMarks.includes(subfield.value.charAt(subfield.value.length - 2))) {
115
+ } else if (lastPuncDot && penultimateCharacter && puncMarks.includes(penultimateCharacter)) {
91
116
  // Console.log("2. Invalid punctuation - duplicate, like '?.'")
92
- message.message.push(`Field ${tag} has invalid ending punctuation`);
117
+ message.message.push(`Field ${tag} has an extra dot after '${penultimateCharacter}'`);
93
118
  if (fix) {
94
119
  subfield.value = subfield.value.slice(0, -1);
95
- message.fix.push(`Field ${tag} - Removed double punctuation from $${subfield.code}`);
120
+ message.fix.push(`Field ${tag} - Removed dot after punctuation from $${subfield.code}`);
121
+ }
122
+ // Last char is dot, but previous two cars are punc+quote, like 'Lorum "Ipsum.".'
123
+ } else if (lastPuncDot && antePenultimateCharacter && validQuoteChars.includes(penultimateCharacter) && puncMarks.includes(antePenultimateCharacter)) {
124
+ message.message.push(`Field ${tag} has an extra dot in '${antePenultimateCharacter}${penultimateCharacter}${lastChar}'`);
125
+ if (fix) {
126
+ subfield.value = subfield.value.slice(0, -1);
127
+ message.fix.push(`Field ${tag} - Removed '${lastChar}' after '${antePenultimateCharacter}${penultimateCharacter}'`);
96
128
  }
97
-
98
129
  // Last char shouldn't be dot !! This is behind checkEnd boolean, because of dots at end of abbreviations, so this is checked only in special cases !!//
99
130
  } else if (checkEnd && (!punc && lastPuncDot)) {
100
131
  // Console.log("3. Invalid punctuation - Shouldn't be dot, is")
101
- message.message.push(`Field ${tag} has invalid ending punctuation`);
132
+ message.message.push(`Field ${tag} has unwanted ending punctuation '${lastChar}'`);
102
133
  if (fix) {
103
134
  subfield.value = subfield.value.slice(0, -1);
104
135
  message.fix.push(`Field ${tag} - Removed punctuation from $${subfield.code}`);
@@ -136,7 +167,7 @@ function validateField(field, linkedTag, fix, message) {
136
167
  if (res.special.ifInd2 && res.special.ifInd2.includes(field.ind2)) {
137
168
  normalPuncRules(lastSubField, res.special.ifBoth, tag, true, res.special.ifLastCharNot);
138
169
 
139
- // Matches execption to special rule, noPuncIfInd2 (likely with value 4, that indicates copyright mark)
170
+ // Matches exception to special rule, noPuncIfInd2 (likely with value 4, that indicates copyright mark)
140
171
  } else if (res.special.noPuncIfInd2 && field.ind2 && res.special.noPuncIfInd2.includes(field.ind2)) {
141
172
  normalPuncRules(lastSubField, !res.special.ifBoth, tag, true, res.special.ifLastCharNot);
142
173
 
@@ -199,6 +230,15 @@ function validateField(field, linkedTag, fix, message) {
199
230
 
200
231
  validateField(field, linkedTag, fix, message);
201
232
  }
233
+ // fallback
234
+ else {
235
+ debug(`special is definedm but no rule applies`);
236
+ const lastSubField = findLastSubfield(field);
237
+
238
+ if (lastSubField) {
239
+ normalPuncRules(lastSubField, res.punc, field.tag, false, false, fix, message);
240
+ }
241
+ }
202
242
  }
203
243
 
204
244
  let res = null;