@natlibfi/marc-record-validators-melinda 10.16.0-alpha.2 → 10.16.1-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/access-rights.js.map +1 -1
  2. package/dist/access-rights.spec.js.map +1 -1
  3. package/dist/double-commas.js.map +1 -1
  4. package/dist/double-commas.spec.js.map +1 -1
  5. package/dist/duplicates-ind1.js.map +1 -1
  6. package/dist/duplicates-ind1.spec.js.map +1 -1
  7. package/dist/empty-fields.js.map +1 -1
  8. package/dist/empty-fields.spec.js.map +1 -1
  9. package/dist/ending-punctuation-conf.js.map +1 -1
  10. package/dist/ending-punctuation.js.map +1 -1
  11. package/dist/ending-punctuation.spec.js.map +1 -1
  12. package/dist/ending-whitespace.js.map +1 -1
  13. package/dist/ending-whitespace.spec.js.map +1 -1
  14. package/dist/field-008-18-34-character-groups.js.map +1 -1
  15. package/dist/field-008-18-34-character-groups.spec.js.map +1 -1
  16. package/dist/field-505-separators.js.map +1 -1
  17. package/dist/field-505-separators.spec.js.map +1 -1
  18. package/dist/field-521-fix.js.map +1 -1
  19. package/dist/field-521-fix.spec.js.map +1 -1
  20. package/dist/field-exclusion.js.map +1 -1
  21. package/dist/field-exclusion.spec.js.map +1 -1
  22. package/dist/field-structure.js.map +1 -1
  23. package/dist/field-structure.spec.js.map +1 -1
  24. package/dist/fields-present.js.map +1 -1
  25. package/dist/fields-present.spec.js.map +1 -1
  26. package/dist/fix-country-codes.js.map +1 -1
  27. package/dist/fix-country-codes.spec.js.map +1 -1
  28. package/dist/fixRelatorTerms.js.map +1 -1
  29. package/dist/fixRelatorTerms.spec.js.map +1 -1
  30. package/dist/fixed-fields.js.map +1 -1
  31. package/dist/fixed-fields.spec.js.map +1 -1
  32. package/dist/identical-fields.js.map +1 -1
  33. package/dist/identical-fields.spec.js.map +1 -1
  34. package/dist/index.js.map +1 -1
  35. package/dist/indicator-fixes.js.map +1 -1
  36. package/dist/indicator-fixes.spec.js.map +1 -1
  37. package/dist/isbn-issn.js.map +1 -1
  38. package/dist/isbn-issn.spec.js.map +1 -1
  39. package/dist/item-language.js.map +1 -1
  40. package/dist/item-language.spec.js.map +1 -1
  41. package/dist/mergeField500Lisapainokset.js.map +1 -1
  42. package/dist/mergeField500Lisapainokset.spec.js.map +1 -1
  43. package/dist/mergeRelatorTermFields.js.map +1 -1
  44. package/dist/mergeRelatorTermFields.spec.js.map +1 -1
  45. package/dist/multiple-subfield-0.js.map +1 -1
  46. package/dist/multiple-subfield-0.spec.js.map +1 -1
  47. package/dist/non-breaking-space.js.map +1 -1
  48. package/dist/non-breaking-space.spec.js.map +1 -1
  49. package/dist/normalize-dashes.js.map +1 -1
  50. package/dist/normalize-dashes.spec.js.map +1 -1
  51. package/dist/normalize-identifiers.js.map +1 -1
  52. package/dist/normalize-identifiers.spec.js.map +1 -1
  53. package/dist/normalize-qualifying-information.js.map +1 -1
  54. package/dist/normalize-qualifying-information.spec.js.map +1 -1
  55. package/dist/normalize-utf8-diacritics.js.map +1 -1
  56. package/dist/normalize-utf8-diacritics.spec.js.map +1 -1
  57. package/dist/normalizeFieldForComparison.js.map +1 -1
  58. package/dist/normalizeSubfieldValueForComparison.js.map +1 -1
  59. package/dist/prepublicationUtils.js.map +1 -1
  60. package/dist/punctuation/index.js.map +1 -1
  61. package/dist/punctuation/rules/aut.js.map +1 -1
  62. package/dist/punctuation/rules/bib.js.map +1 -1
  63. package/dist/punctuation/rules/index.js.map +1 -1
  64. package/dist/punctuation.spec.js.map +1 -1
  65. package/dist/punctuation2.js +131 -89
  66. package/dist/punctuation2.js.map +1 -1
  67. package/dist/punctuation2.spec.js.map +1 -1
  68. package/dist/reindexSubfield6OccurenceNumbers.js.map +1 -1
  69. package/dist/reindexSubfield6OccurenceNumbers.spec.js.map +1 -1
  70. package/dist/removeDuplicateDataFields.js.map +1 -1
  71. package/dist/removeDuplicateDataFields.spec.js.map +1 -1
  72. package/dist/removeInferiorDataFields.js.map +1 -1
  73. package/dist/removeInferiorDataFields.spec.js.map +1 -1
  74. package/dist/resolvable-ext-references-melinda.js.map +1 -1
  75. package/dist/resolvable-ext-references-melinda.spec.js.map +1 -1
  76. package/dist/resolveOrphanedSubfield6s.js.map +1 -1
  77. package/dist/resolveOrphanedSubfield6s.spec.js.map +1 -1
  78. package/dist/sanitize-vocabulary-source-codes.js.map +1 -1
  79. package/dist/sanitize-vocabulary-source-codes.spec.js.map +1 -1
  80. package/dist/sort-tags.js.map +1 -1
  81. package/dist/sort-tags.spec.js.map +1 -1
  82. package/dist/sortFields.js.map +1 -1
  83. package/dist/sortFields.spec.js.map +1 -1
  84. package/dist/sortRelatorTerms.js.map +1 -1
  85. package/dist/sortRelatorTerms.spec.js.map +1 -1
  86. package/dist/sortSubfields.js.map +1 -1
  87. package/dist/sortSubfields.spec.js.map +1 -1
  88. package/dist/stripPunctuation.js.map +1 -1
  89. package/dist/stripPunctuation.spec.js.map +1 -1
  90. package/dist/subfield-exclusion.js.map +1 -1
  91. package/dist/subfield-exclusion.spec.js.map +1 -1
  92. package/dist/subfield6Utils.js.map +1 -1
  93. package/dist/subfield8Utils.js.map +1 -1
  94. package/dist/subfieldValueNormalizations.js +28 -8
  95. package/dist/subfieldValueNormalizations.js.map +1 -1
  96. package/dist/subfieldValueNormalizations.spec.js.map +1 -1
  97. package/dist/sync-007-and-300.js.map +1 -1
  98. package/dist/sync-007-and-300.spec.js.map +1 -1
  99. package/dist/typeOfDate-008.js.map +1 -1
  100. package/dist/typeOfDate-008.spec.js.map +1 -1
  101. package/dist/unicode-decomposition.js.map +1 -1
  102. package/dist/unicode-decomposition.spec.js.map +1 -1
  103. package/dist/update-field-540.js.map +1 -1
  104. package/dist/update-field-540.spec.js.map +1 -1
  105. package/dist/urn.js.map +1 -1
  106. package/dist/urn.spec.js.map +1 -1
  107. package/dist/utils.js.map +1 -1
  108. package/package.json +6 -6
  109. package/src/punctuation2.js +103 -56
  110. package/src/subfieldValueNormalizations.js +32 -9
  111. package/test-fixtures/normalize-subfield-value/03/expectedResult.json +26 -0
  112. package/test-fixtures/normalize-subfield-value/03/metadata.json +6 -0
  113. package/test-fixtures/normalize-subfield-value/03/record.json +25 -0
  114. package/test-fixtures/normalize-subfield-value/100_and_880/expectedResult.json +22 -0
  115. package/test-fixtures/normalize-subfield-value/100_and_880/metadata.json +5 -0
  116. package/test-fixtures/normalize-subfield-value/100_and_880/record.json +20 -0
  117. package/test-fixtures/punctuation2/100_and_880/expectedResult.json +22 -0
  118. package/test-fixtures/punctuation2/100_and_880/metadata.json +6 -0
  119. package/test-fixtures/punctuation2/100_and_880/record.json +20 -0
  120. package/test-fixtures/punctuation2/240/expectedResult.json +12 -0
  121. package/test-fixtures/punctuation2/240/metadata.json +6 -0
  122. package/test-fixtures/punctuation2/240/record.json +10 -0
  123. package/test-fixtures/punctuation2/800/expectedResult.json +15 -0
  124. package/test-fixtures/punctuation2/800/metadata.json +6 -0
  125. package/test-fixtures/punctuation2/800/record.json +14 -0
  126. package/test-fixtures/strip-punctuation/100_and_880/expectedResult.json +37 -0
  127. package/test-fixtures/strip-punctuation/100_and_880/metadata.json +5 -0
  128. package/test-fixtures/strip-punctuation/100_and_880/record.json +35 -0
  129. package/test-fixtures/strip-punctuation/240/expectedResult.json +16 -0
  130. package/test-fixtures/strip-punctuation/240/metadata.json +6 -0
  131. package/test-fixtures/strip-punctuation/240/record.json +14 -0
@@ -10,27 +10,29 @@
10
10
  * (They are jumped over when looking for next (non-controlfield subfield)
11
11
  */
12
12
  import {validateSingleField} from './ending-punctuation';
13
+ import {fieldGetUnambiguousTag} from './subfield6Utils';
13
14
  //import createDebugLogger from 'debug';
14
- import {fieldToString, nvdebug, subfieldToString} from './utils';
15
+ import {fieldToString, nvdebug} from './utils';
15
16
  import clone from 'clone';
16
17
 
17
18
  //const debug = createDebugLogger('debug/punctuation2');
18
19
 
20
+ const descriptionString = 'Remove invalid and add valid punctuation to data fields';
19
21
  export default function () {
20
22
  return {
21
- description: 'Add punctuation to data fields',
23
+ description: descriptionString,
22
24
  validate, fix
23
25
  };
24
26
 
25
27
  function fix(record) {
26
- nvdebug('Add punctuation to data fields: fixer');
28
+ nvdebug(`${descriptionString}: fixer`);
27
29
  const res = {message: [], fix: [], valid: true};
28
30
  record.fields.forEach(f => fieldFixPunctuation(f));
29
31
  return res;
30
32
  }
31
33
 
32
34
  function validate(record) {
33
- nvdebug('Add punctuation to data fields: validate');
35
+ nvdebug(`${descriptionString}: validate`);
34
36
 
35
37
  const fieldsNeedingModification = record.fields.filter(f => fieldNeedsModification(f, true));
36
38
 
@@ -84,9 +86,9 @@ export function fieldNeedsModification(field, add = true) {
84
86
 
85
87
 
86
88
  //const stripCrap = / *[-;:,+]+$/u;
87
- const commaNeedsPuncAfter = /(?:[a-z0-9A-Z]|å|ä|ö|Å|Ä|Ö|\))$/u;
88
89
  const defaultNeedsPuncAfter = /(?:[a-z0-9A-Z]|å|ä|ö|Å|Ä|Ö)$/u;
89
90
  const defaultNeedsPuncAfter2 = /(?:[\]a-zA-Z0-9)]|ä|å|ö|Å|Ä|Ö)$/u;
91
+ const doesNotEndInPunc = /[^!?.:;,]$/u; // non-punc for pre-240/700/XXX $, note that '.' comes if preceded by ')'
90
92
  const blocksPuncRHS = /^(?:\()/u;
91
93
  const allowsPuncRHS = /^(?:[A-Za-z0-9]|å|ä|ö|Å|Ä|Ö)/u;
92
94
 
@@ -96,7 +98,7 @@ const puncIsProbablyPunc = /(?:[a-z0-9)]|å|ä|ö) ?[.,:;]$/u;
96
98
 
97
99
  // Will unfortunately trigger "Sukunimi, Th." type:
98
100
  const removeColons = {'code': 'abcdefghijklmnopqrstuvwxyz', 'remove': / *[;:]$/u};
99
- const removeX00Comma = {'code': 'abcqde', 'followedBy': 'abcqde#', 'context': /.,$/u, 'remove': /,$/u};
101
+ const removeX00Comma = {'code': 'abcdenqt', 'followedBy': 'abcdenqtv#', 'context': /.,$/u, 'remove': /,$/u};
100
102
  const cleanRHS = {'code': 'abcd', 'followedBy': 'bcde', 'context': /(?:(?:[a-z0-9]|å|ä|ö)\.|,)$/u, 'contextRHS': blocksPuncRHS, 'remove': /[.,]$/u};
101
103
  const cleanX00dCommaOrDot = {'code': 'd', 'followedBy': 'et#', 'context': /[0-9]-[,.]$/u, 'remove': /[,.]$/u};
102
104
  const cleanX00aDot = {'code': 'abcde', 'followedBy': 'cdegj', 'context': dotIsProbablyPunc, 'remove': /\.$/u};
@@ -104,22 +106,28 @@ const cleanCorruption = {'code': 'abcdefghijklmnopqrstuvwxyz', 'remove': / \.$/u
104
106
  // These $e dot removals are tricky: before removing the comma, we should know that it ain't an abbreviation such as "esitt."...
105
107
  const cleanX00eDot = {'code': 'e', 'followedBy': 'egj#', 'context': /(?:[ai]ja|jä)[.,]$/u, 'remove': /\.$/u};
106
108
 
109
+ const removeCommaBeforeLanguageSubfieldL = {'followedBy': 'l', 'remove': /,$/u};
110
+ const removeCommaBeforeTitleSubfieldT = {'followedBy': 't', 'remove': /,$/u};
111
+
107
112
  const X00RemoveDotAfterBracket = {'code': 'cq', 'context': /\)\.$/u, 'remove': /\.$/u};
108
113
  // 390, 800, 810, 830...
109
114
  const cleanPuncBeforeLanguage = {'code': 'atvxyz', 'followedBy': 'l', 'context': puncIsProbablyPunc, 'remove': / *[.,:;]$/u};
110
115
 
111
116
 
112
- const addX00aComma = {'add': ',', 'code': 'abcqdej', 'followedBy': 'cdeg', 'context': commaNeedsPuncAfter, 'contextRHS': allowsPuncRHS};
117
+ const addX00aComma = {'add': ',', 'code': 'abcqej', 'followedBy': 'cdeg', 'context': doesNotEndInPunc, 'contextRHS': allowsPuncRHS};
118
+ const addX00dComma = {'name': 'X00$d ending in "-" does not get comma', 'add': ',', 'code': 'd', 'followedBy': 'cdeg', 'context': /[^-,.!]$/u, 'contextRHS': allowsPuncRHS};
113
119
  const addX00aComma2 = {'add': ',', 'code': 'abcdej', 'followedBy': 'cdeg', 'context': /(?:[A-Z]|Å|Ä|Ö)\.$/u, 'contextRHS': allowsPuncRHS};
114
- const addX00aDot = {'add': '.', 'code': 'abcdet', 'followedBy': '#tu', 'context': defaultNeedsPuncAfter};
120
+ const addX00Dot = {'add': '.', 'code': 'abcdetv', 'followedBy': '#fklptu', 'context': defaultNeedsPuncAfter};
121
+
115
122
 
116
123
  //const addX10iaComma = {'name': 'Punctuate relationship information', 'code': 'i', 'followedBy': 'a', 'context': defaultNeedsPuncAfter2};
117
124
  const addX10bDot = {'name': 'Add X10 pre-$b dot', 'add': '.', 'code': 'ab', 'followedBy': 'b', 'context': defaultNeedsPuncAfter};
118
125
  const addX10eComma = {'add': ',', 'code': 'abe', 'followedBy': 'e', 'context': defaultNeedsPuncAfter};
119
126
  const addX10Dot = {'name': 'Add X10 final dot', 'add': '.', 'code': 'abet', 'followedBy': 'tu#', 'context': defaultNeedsPuncAfter};
120
- const addLanguageComma = {'name': 'Add comma before 810$l', 'add': ',', 'code': 'tv', 'followedBy': 'l', 'context': defaultNeedsPuncAfter2};
121
127
  const addColonToRelationshipInformation = {'name': 'Add \':\' to 7X0 $i relationship info', 'add': ':', 'code': 'i', 'context': defaultNeedsPuncAfter2};
122
128
 
129
+ const addDotBeforeLanguageSubfieldL = {'name': 'Add dot before $l', 'add': '.', 'code': 'abepst', 'followedBy': 'l', 'context': doesNotEndInPunc};
130
+
123
131
  // 490:
124
132
  const addSemicolonBeforeVolumeDesignation = {'name': 'Add " ;" before $v', 'add': ' ;', 'code': 'atxyz', 'followedBy': 'v', 'context': /[^;]$/u};
125
133
 
@@ -131,8 +139,12 @@ const REMOVE_AND_ADD = 3;
131
139
  // Crappy punctuation consists of various crap that is somewhat common.
132
140
  // We strip crap for merge decisions. We are not trying to actively remove crap here.
133
141
 
134
- const removeX00Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, cleanX00dCommaOrDot, cleanRHS, X00RemoveDotAfterBracket, removeColons, cleanPuncBeforeLanguage];
135
- const removeX10Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, removeColons, cleanPuncBeforeLanguage];
142
+ const removeCrapFromAllEntryFields = [removeCommaBeforeLanguageSubfieldL, removeCommaBeforeTitleSubfieldT];
143
+
144
+ const removeX00Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, cleanX00dCommaOrDot, cleanRHS, X00RemoveDotAfterBracket, removeColons, cleanPuncBeforeLanguage, ...removeCrapFromAllEntryFields];
145
+ const removeX10Whatever = [removeX00Comma, cleanX00aDot, cleanX00eDot, cleanCorruption, removeColons, cleanPuncBeforeLanguage, ...removeCrapFromAllEntryFields];
146
+ const removeX11Whatever = removeCrapFromAllEntryFields;
147
+ const removeX30Whatever = removeCrapFromAllEntryFields;
136
148
 
137
149
  const remove490And830Whatever = [{'code': 'axyzv', 'followedBy': 'axyzv', 'remove': /(?: *;| *=|,)$/u}];
138
150
 
@@ -152,12 +164,16 @@ const crappy24X = [
152
164
  {'code': 'abc', 'followedBy': '#', 'remove': /\.$/u, 'context': dotIsProbablyPunc},
153
165
  {'code': 'abfghinp', 'followedBy': '#', 'remove': /\.$/u, 'context': dotIsProbablyPunc},
154
166
  {'code': 'n', 'followedBy': 'p', 'remove': /\.$/u, 'context': dotIsProbablyPunc}, // MELINDA-8817
155
- {'code': 'p', 'followedBy': 'pc', 'remove': /\.$/u, 'context': dotIsProbablyPunc} // MELINDA-8817
167
+ {'code': 'p', 'followedBy': 'pc', 'remove': /\.$/u, 'context': dotIsProbablyPunc}, // MELINDA-8817
168
+ removeCommaBeforeLanguageSubfieldL
156
169
  ];
157
170
 
171
+
158
172
  const cleanCrappyPunctuationRules = {
159
173
  '100': removeX00Whatever,
160
174
  '110': removeX10Whatever,
175
+ '111': removeX11Whatever,
176
+ '130': removeX30Whatever,
161
177
  '240': crappy24X,
162
178
  '245': crappy24X,
163
179
  '246': crappy24X,
@@ -173,8 +189,12 @@ const cleanCrappyPunctuationRules = {
173
189
  '490': remove490And830Whatever,
174
190
  '600': removeX00Whatever,
175
191
  '610': removeX10Whatever,
192
+ '611': removeX11Whatever,
193
+ '630': removeX30Whatever,
176
194
  '700': removeX00Whatever,
177
195
  '710': removeX10Whatever,
196
+ '711': removeX11Whatever,
197
+ '730': removeX30Whatever,
178
198
  '773': linkingEntryWhatever,
179
199
  '774': linkingEntryWhatever,
180
200
  '776': linkingEntryWhatever,
@@ -189,15 +209,16 @@ const cleanLegalX00Comma = {'code': 'abcde', 'followedBy': 'cdegj', 'context': /
189
209
  const cleanLegalX00bDot = {'code': 'b', 'followedBy': 't#', context: /^[IVXLCDM]+\.$/u, 'remove': /\.$/u};
190
210
  const cleanLegalX00iColon = {'code': 'i', 'followedBy': 'a', 'remove': / *:$/u}; // NB! context is not needed
191
211
  const cleanLegalX00Dot = {'code': 'abcdetvl', 'followedBy': 'tu#', 'context': /(?:[a-z0-9)]|å|ä|ö)\.$/u, 'remove': /\.$/u};
192
- const cleanLanguageComma = {'name': 'language comma', 'code': 'tv', 'followedBy': 'l', 'context': /.,$/u, 'remove': /,$/u};
212
+ const cleanDotBeforeLanguageSubfieldL = {'name': 'pre-language-$l dot', 'followedBy': 'l', 'context': /.\.$/u, 'remove': /\.$/u};
193
213
 
214
+ const legalEntryField = [cleanDotBeforeLanguageSubfieldL];
194
215
 
195
- const legalX00punc = [cleanLegalX00Comma, cleanLegalX00iColon, cleanLegalX00bDot, cleanLegalX00Dot, cleanLanguageComma];
216
+ const legalX00punc = [cleanLegalX00Comma, cleanLegalX00iColon, cleanLegalX00bDot, cleanLegalX00Dot, ...legalEntryField];
196
217
 
197
218
  const cleanLegalX10Comma = {'name': 'X10comma', 'code': 'abe', 'followedBy': 'e', 'context': /.,$/u, 'remove': /,$/u};
198
219
  const cleanLegalX10Dot = {'name': 'X10dot', 'code': 'abt', 'followedBy': 'bst#', 'context': /.\.$/u, 'remove': /\.$/u};
199
220
 
200
- const legalX10punc = [cleanLegalX10Comma, cleanLegalX10Dot, cleanX00eDot, cleanLanguageComma];
221
+ const legalX10punc = [cleanLegalX10Comma, cleanLegalX10Dot, cleanX00eDot, ...legalEntryField];
201
222
 
202
223
  const cleanLegalSeriesTitle = [ // 490 and 830
203
224
  {'code': 'a', 'followedBy': 'a', 'remove': / =$/u},
@@ -213,18 +234,17 @@ const clean24X = [
213
234
  {'name': 'ABFNP:C', 'code': 'abfnp', 'followedBy': 'c', 'remove': / \/$/u},
214
235
  {'name': 'ABN:N', 'code': 'abn', 'followedBy': 'n', 'remove': /\.$/u},
215
236
  {'name': 'ABNP:#', 'code': 'abnp', 'followedBy': '#', 'remove': /\.$/u},
216
- {'name': 'N:P', 'code': 'n', 'followedBy': 'p', 'remove': /,$/u}
237
+ {'name': 'N:P', 'code': 'n', 'followedBy': 'p', 'remove': /,$/u},
238
+ cleanDotBeforeLanguageSubfieldL
217
239
  ];
218
240
 
219
241
  const cleanValidPunctuationRules = {
220
242
  '100': legalX00punc,
221
243
  '110': legalX10punc,
222
- '600': legalX00punc,
223
- '610': legalX10punc,
224
- '700': legalX00punc,
225
- '710': legalX10punc,
226
- '800': legalX00punc,
227
- '810': legalX10punc,
244
+ '111': legalEntryField,
245
+ '130': legalEntryField,
246
+ '240': clean24X,
247
+ '243': clean24X,
228
248
  '245': clean24X,
229
249
  '246': clean24X,
230
250
  '260': [
@@ -248,34 +268,48 @@ const cleanValidPunctuationRules = {
248
268
  ],
249
269
  '490': cleanLegalSeriesTitle,
250
270
  '534': [{'code': 'p', 'followedBy': 'c', 'remove': /:$/u}],
271
+ '600': legalX00punc,
272
+ '610': legalX10punc,
273
+ '611': legalEntryField,
274
+ '630': legalEntryField,
251
275
  // Experimental, MET366-ish (end punc in internationally valid, but we don't use it here in Finland):
252
276
  '648': [{'code': 'a', 'content': /^[0-9]+\.$/u, 'ind2': ['4'], 'remove': /\.$/u}],
253
- '830': cleanLegalSeriesTitle,
277
+ '700': legalX00punc,
278
+ '710': legalX10punc,
279
+ '711': legalEntryField,
280
+ '730': legalEntryField,
281
+ '800': legalX00punc,
282
+ '810': legalX10punc,
283
+ '811': legalEntryField,
284
+ '830': [...legalEntryField, ...cleanLegalSeriesTitle],
254
285
  '946': clean24X
255
-
256
286
  };
257
287
 
258
- // addColonToRelationshipInformation only applies to 700/710 but as others don't have $i, it's fine
259
- const addX00 = [addX00aComma, addX00aComma2, addX00aDot, addLanguageComma, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
260
- const addX10 = [addX10bDot, addX10eComma, addX10Dot, addLanguageComma, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
261
288
 
262
- const add245 = [
263
- // Blah! Also "$a = $b" and "$a ; $b" can be valid... But ' :' is better than nothing, I guess...
264
- {'code': 'a', 'followedBy': 'b', 'add': ' :', 'context': defaultNeedsPuncAfter},
265
- {'code': 'ab', 'followedBy': 'n', 'add': '.', 'context': defaultNeedsPuncAfter},
266
- {'code': 'abk', 'followedBy': 'f', 'add': ',', 'context': defaultNeedsPuncAfter},
267
- {'code': 'n', 'followedBy': 'p', 'add': ',', 'context': defaultNeedsPuncAfter},
268
- {'code': 'abfnp', 'followedBy': 'c', 'add': ' /', 'context': defaultNeedsPuncAfter},
269
- {'code': 'abc', 'followedBy': '#', 'add': '.', 'context': defaultNeedsPuncAfter} // Stepping on "punctuation validaror's" toes
270
- ];
289
+ // Overgeneralizes a bit: eg. addColonToRelationshipInformation only applies to 700/710 but as others don't have $i, it's fine.
290
+ const addToAllEntryFields = [addDotBeforeLanguageSubfieldL, addSemicolonBeforeVolumeDesignation, addColonToRelationshipInformation];
291
+
292
+
293
+ const addX00 = [addX00aComma, addX00aComma2, addX00Dot, addX00dComma, ...addToAllEntryFields];
294
+ const addX10 = [addX10bDot, addX10eComma, addX10Dot, ...addToAllEntryFields];
295
+ const addX11 = [...addToAllEntryFields];
296
+ const addX30 = [...addToAllEntryFields];
271
297
 
272
- const add246 = [
298
+ const add24X = [
273
299
  {'code': 'i', 'followedBy': 'a', 'add': ':', 'context': defaultNeedsPuncAfter},
274
300
  {'code': 'a', 'followedBy': 'b', 'add': ' :', 'context': defaultNeedsPuncAfter},
275
301
  {'code': 'abk', 'followedBy': 'f', 'add': ',', 'context': defaultNeedsPuncAfter},
276
- {'code': 'abfnp', 'followedBy': 'c', 'add': ' /', 'context': defaultNeedsPuncAfter}
302
+ {'code': 'abfnp', 'followedBy': 'c', 'add': ' /', 'context': defaultNeedsPuncAfter},
303
+ addDotBeforeLanguageSubfieldL
277
304
  ];
278
305
 
306
+ const add245 = [
307
+ ...add24X,
308
+ // Blah! Also "$a = $b" and "$a ; $b" can be valid... But ' :' is better than nothing, I guess...
309
+ {'code': 'ab', 'followedBy': 'n', 'add': '.', 'context': defaultNeedsPuncAfter},
310
+ {'code': 'n', 'followedBy': 'p', 'add': ',', 'context': defaultNeedsPuncAfter},
311
+ {'code': 'abc', 'followedBy': '#', 'add': '.', 'context': defaultNeedsPuncAfter} // Stepping on "punctuation validator's" toes
312
+ ];
279
313
 
280
314
  const addSeriesTitle = [ // 490 and 830
281
315
  {'code': 'a', 'followedBy': 'a', 'add': ' =', 'context': defaultNeedsPuncAfter2},
@@ -286,9 +320,12 @@ const addSeriesTitle = [ // 490 and 830
286
320
  const addPairedPunctuationRules = {
287
321
  '100': addX00,
288
322
  '110': addX10,
289
- '240': add246,
323
+ '111': addX11,
324
+ '130': addX30,
325
+ '240': add24X,
326
+ '243': add24X,
290
327
  '245': add245,
291
- '246': add246,
328
+ '246': add24X,
292
329
  '260': [
293
330
  {'code': 'a', 'followedBy': 'b', 'add': ' :', 'context': defaultNeedsPuncAfter2},
294
331
  {'code': 'b', 'followedBy': 'c', 'add': ',', 'context': defaultNeedsPuncAfter2},
@@ -313,11 +350,16 @@ const addPairedPunctuationRules = {
313
350
  '534': [{'code': 'p', 'followedBy': 'c', 'add': ':', 'context': defaultNeedsPuncAfter2}],
314
351
  '600': addX00,
315
352
  '610': addX10,
353
+ '611': addX11,
354
+ '630': addX30,
316
355
  '700': addX00,
317
356
  '710': addX10,
357
+ '711': addX11,
358
+ '730': addX30,
318
359
  '800': addX00,
319
360
  '810': addX10,
320
- '830': addSeriesTitle,
361
+ '811': addX11,
362
+ '830': [...addX30, ...addSeriesTitle],
321
363
  '946': [{'code': 'i', 'followedBy': 'a', 'add': ':', 'context': defaultNeedsPuncAfter}]
322
364
  };
323
365
 
@@ -340,6 +382,9 @@ function debugRule(rule) {
340
382
  */
341
383
 
342
384
  function ruleAppliesToSubfieldCode(targetSubfieldCodes, currSubfieldCode) {
385
+ if (!targetSubfieldCodes) { // We are not interested in what subfield precedes 240$l, ',' is removed anyway
386
+ return true;
387
+ }
343
388
  const negation = targetSubfieldCodes.includes('!');
344
389
  if (negation) {
345
390
  return !targetSubfieldCodes.includes(currSubfieldCode);
@@ -424,38 +469,40 @@ function checkRule(rule, field, subfield1, subfield2) {
424
469
  return true;
425
470
  }
426
471
 
427
- function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, operation = NONE) {
428
-
429
- if (!(`${field.tag}` in ruleArray) || ruleArray === null || operation === NONE) {
430
-
431
- /*
432
- if (!['020', '650'].includes(tag) || !isControlSubfieldCode(subfield1.code)) { // eslint-disable-line functional/no-conditional-statements
433
- nvdebug(`No punctuation rules found for ${tag} (looking for: ‡${subfield1.code})`, debug);
434
472
 
435
- }
436
- */
473
+ function applyPunctuationRules(field, subfield1, subfield2, ruleArray = null, operation = NONE) {
474
+ if (operation === NONE || ruleArray === null) { // !fieldIsApplicable(field, ruleArray)) {
437
475
  return;
438
476
  }
439
- nvdebug(`PUNCTUATE ${field.tag} '${subfieldToString(subfield1)}' XXX '${subfield2 ? subfieldToString(subfield2) : '#'} }`);
477
+ const tag2 = field.tag === '880' ? fieldGetUnambiguousTag(field) : field.tag;
478
+ if (!tag2) {
479
+ return;
480
+ }
481
+ if (!(`${tag2}` in ruleArray)) {
482
+ return;
483
+ }
484
+
485
+ //nvdebug(`PUNCTUATE ${field.tag}/${tag2} '${subfieldToString(subfield1)}' XXX '${subfield2 ? subfieldToString(subfield2) : '#'} }`);
440
486
 
441
- //nvdebug(`OP=${operation} ${tag}: '${subfield1.code}: ${subfield1.value}' ??? '${subfield2 ? subfield2.code : '#'}'`, debug);
442
- const candRules = ruleArray[field.tag];
487
+ //nvdebug(`OP=${operation} ${tag2}: '${subfield1.code}: ${subfield1.value}' ??? '${subfield2 ? subfield2.code : '#'}'`);
488
+ const candRules = ruleArray[tag2];
443
489
  candRules.forEach(rule => {
444
490
  //debugRule(rule);
445
-
491
+ //nvdebug(' WP1');
446
492
  if (!checkRule(rule, field, subfield1, subfield2)) {
447
493
  return;
448
494
  }
495
+ //nvdebug(' WP2');
449
496
 
450
497
  //const originalValue = subfield1.value;
451
498
  if (rule.remove && [REMOVE, REMOVE_AND_ADD].includes(operation) && subfield1.value.match(rule.remove)) { // eslint-disable-line functional/no-conditional-statements
452
499
  //nvdebug(` PUNC REMOVAL TO BE PERFORMED FOR $${subfield1.code} '${subfield1.value}'`, debug);
453
500
  subfield1.value = subfield1.value.replace(rule.remove, ''); // eslint-disable-line functional/immutable-data
454
- //nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'`, debug);
501
+ //nvdebug(` PUNC REMOVAL PERFORMED FOR '${subfield1.value}'`);
455
502
  }
456
503
  if (rule.add && [ADD, REMOVE_AND_ADD].includes(operation)) { // eslint-disable-line functional/no-conditional-statements
457
504
  subfield1.value += rule.add; // eslint-disable-line functional/immutable-data
458
- //nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}'`, debug);
505
+ //nvdebug(` ADDED '${rule.add}' TO FORM '${subfield1.value}'`);
459
506
  }
460
507
 
461
508
  /*
@@ -1,6 +1,8 @@
1
1
  //import createDebugLogger from 'debug';
2
2
  import clone from 'clone';
3
3
  import {fieldHasSubfield, fieldToString} from './utils';
4
+ import {fieldFixPunctuation} from './punctuation2';
5
+ import {fieldGetUnambiguousTag} from './subfield6Utils';
4
6
 
5
7
 
6
8
  // Author(s): Nicholas Volk
@@ -69,20 +71,41 @@ function handleInitials(value, subfieldCode, field) {
69
71
  // initial space confirms us that it's an initial
70
72
  return str.match(/ (?:[A-Z]|Å|Ä|Ö)\.(?:[A-Z]|Å|Ä|Ö)/u);
71
73
  }
72
-
73
74
  }
75
+
74
76
  function getNormalizedValue(subfield, field) {
75
- /* eslint-disable */
76
- let value = subfield.value;
77
- value = handleInitials(value, subfield.code, field);
77
+ return uppercaseLanguage(handleMovies(handleInitials(subfield.value, subfield.code, field)));
78
78
 
79
79
 
80
- if (subfield.code === 'a' && ['130', '630', '730'].includes(field.tag)) {
81
- // MRA-614: "(elokuva, 2000)" => "(elokuva : 2000)""
82
- return value.replace(/\((elokuva), (19[0-9][0-9]|20[0-2][0-9])\)/u, '($1 : $2)'); // eslint-disable-line prefer-named-capture-group
80
+ function handleMovies(value) {
81
+ if (subfield.code === 'a' && ['130', '630', '730'].includes(field.tag)) {
82
+ // MRA-614: "(elokuva, 2000)" => "(elokuva : 2000)""
83
+ return value.replace(/\((elokuva), (19[0-9][0-9]|20[0-2][0-9])\)/u, '($1 : $2)'); // eslint-disable-line prefer-named-capture-group
84
+ }
85
+ return value;
86
+ }
87
+
88
+ function uppercaseLanguage(value) { // Part of MET-549
89
+ const relevantTags = ['130', '240', '243', '600', '610', '611', '630', '700', '710', '711', '730', '800', '810', '811', '830'];
90
+
91
+ if (subfield.code !== 'l') {
92
+ return value;
93
+ }
94
+ const targetTag = tagForUppercasing();
95
+ if (relevantTags.includes(targetTag)) {
96
+ const newValue = `${value[0].toUpperCase()}${value.slice(1)}`;
97
+ if (newValue !== value) {
98
+ fieldFixPunctuation(field); // Rather hackily try to fix prev punc on the fly
99
+ return newValue;
100
+ }
101
+ }
102
+
103
+ function tagForUppercasing() {
104
+ return field.tag === '880' ? fieldGetUnambiguousTag(field) : field.tag;
105
+ }
106
+
107
+ return value;
83
108
  }
84
- /* eslint-enable */
85
- return value;
86
109
  }
87
110
 
88
111
  function normalizeSubfieldValues(field) {
@@ -0,0 +1,26 @@
1
+ {
2
+ "_validationOptions": {},
3
+ "fields": [
4
+ { "tag": "005", "value": "20220202020202.0" },
5
+ { "tag": "130", "ind1": "1", "ind2": " ", "subfields": [
6
+ { "code": "a", "value": "Sukunimi, A. B." },
7
+ { "code": "t", "value": "Opus." },
8
+ { "code": "l", "value": "Suomi" }
9
+ ]},
10
+ { "tag": "240", "ind1": "1", "ind2": " ", "subfields": [
11
+ { "code": "a", "value": "Supo." },
12
+ { "code": "l", "value": "Saame" }
13
+ ]},
14
+ { "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
15
+ { "code": "a", "value": "Sukunimi, A. B." },
16
+ { "code": "t", "value": "Opus." },
17
+ { "code": "l", "value": "Suomi" }
18
+ ]},
19
+ { "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
20
+ { "code": "a", "value": "Sukunimi, A. B." },
21
+ { "code": "t", "value": "Opus." },
22
+ { "code": "l", "value": "Suomi" }
23
+ ]}
24
+ ],
25
+ "leader": ""
26
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "description": "Apply subfield internal normalization",
3
+ "comment": "Related issues: MRA-273, MRA-614...",
4
+ "fix": true,
5
+ "only": false
6
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "fields": [
3
+ { "tag": "005", "value": "20220202020202.0" },
4
+ { "tag": "130", "ind1": "1", "ind2": " ", "subfields": [
5
+ { "code": "a", "value": "Sukunimi, A. B." },
6
+ { "code": "t", "value": "Opus," },
7
+ { "code": "l", "value": "suomi" }
8
+ ]},
9
+ { "tag": "240", "ind1": "1", "ind2": " ", "subfields": [
10
+ { "code": "a", "value": "Supo," },
11
+ { "code": "l", "value": "saame" }
12
+ ]},
13
+ { "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
14
+ { "code": "a", "value": "Sukunimi, A. B." },
15
+ { "code": "t", "value": "Opus." },
16
+ { "code": "l", "value": "Suomi" }
17
+ ]},
18
+ { "tag": "600", "ind1": "1", "ind2": " ", "subfields": [
19
+ { "code": "a", "value": "Sukunimi, A. B." },
20
+ { "code": "t", "value": "Opus," },
21
+ { "code": "l", "value": "suomi" }
22
+ ]}
23
+
24
+ ]
25
+ }
@@ -0,0 +1,22 @@
1
+ {
2
+ "_validationOptions": {},
3
+ "fields": [
4
+ { "tag": "005", "value": "20220202020202.0" },
5
+ { "tag": "700", "ind1": "1", "ind2": "2", "subfields": [
6
+ { "code": "6", "value": "880-06" },
7
+ { "code": "a", "value": "Jang, Gangmyeong," },
8
+ { "code": "d", "value": "1975-" },
9
+ { "code": "t", "value": "Albasaeng jareugi." },
10
+ { "code": "l", "value": "Englanti."}
11
+ ]},
12
+ { "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
13
+ { "code": "6", "value": "700-06/$1" },
14
+ { "code": "a", "value": "장강명," },
15
+ { "code": "d", "value": "1975-" },
16
+ { "code": "t", "value": "알바생 자르기." },
17
+ { "code": "l", "value": "Englanti."
18
+ }
19
+ ]}
20
+ ],
21
+ "leader": ""
22
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "description": "Normalize $l",
3
+ "comment": "Related issues: MRA-273, MRA-614...",
4
+ "fix": true
5
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "fields": [
3
+ { "tag": "005", "value": "20220202020202.0" },
4
+ { "tag": "700", "ind1": "1", "ind2": "2", "subfields": [
5
+ { "code": "6", "value": "880-06" },
6
+ { "code": "a", "value": "Jang, Gangmyeong," },
7
+ { "code": "d", "value": "1975-" },
8
+ { "code": "t", "value": "Albasaeng jareugi," },
9
+ { "code": "l", "value": "englanti."}
10
+ ]},
11
+ { "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
12
+ { "code": "6", "value": "700-06/$1" },
13
+ { "code": "a", "value": "장강명," },
14
+ { "code": "d", "value": "1975-" },
15
+ { "code": "t", "value": "알바생 자르기," },
16
+ { "code": "l", "value": "englanti."
17
+ }
18
+ ]}
19
+ ]
20
+ }
@@ -0,0 +1,22 @@
1
+ {
2
+ "fields": [
3
+ { "tag": "005", "value": "20220202020202.0" },
4
+ { "tag": "100", "ind1": "1", "ind2": "2", "subfields": [
5
+ { "code": "6", "value": "880-06" },
6
+ { "code": "a", "value": "Jang, Gangmyeong," },
7
+ { "code": "d", "value": "1975-" },
8
+ { "code": "t", "value": "Albasaeng jareugi." },
9
+ { "code": "l", "value": "englanti."}
10
+ ]},
11
+ { "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
12
+ { "code": "6", "value": "100-06/$1" },
13
+ { "code": "a", "value": "장강명," },
14
+ { "code": "d", "value": "1975-" },
15
+ { "code": "t", "value": "알바생 자르기." },
16
+ { "code": "l", "value": "englanti."
17
+ }
18
+ ]}
19
+ ],
20
+ "leader": "",
21
+ "_validationOptions": {}
22
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "description": "100_and_880: remove crappy punctuation and add proper one",
3
+ "comment": "Field 880 uses rules for 100 as it has $6 100-XX",
4
+ "fix": true,
5
+ "only": false
6
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "fields": [
3
+ { "tag": "005", "value": "20220202020202.0" },
4
+ { "tag": "100", "ind1": "1", "ind2": "2", "subfields": [
5
+ { "code": "6", "value": "880-06" },
6
+ { "code": "a", "value": "Jang, Gangmyeong" },
7
+ { "code": "d", "value": "1975-," },
8
+ { "code": "t", "value": "Albasaeng jareugi" },
9
+ { "code": "l", "value": "englanti."}
10
+ ]},
11
+ { "tag": "880", "ind1": "1", "ind2": "2", "subfields": [
12
+ { "code": "6", "value": "100-06/$1" },
13
+ { "code": "a", "value": "장강명" },
14
+ { "code": "d", "value": "1975-." },
15
+ { "code": "t", "value": "알바생 자르기" },
16
+ { "code": "l", "value": "englanti."
17
+ }
18
+ ]}
19
+ ]
20
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ "leader": "01331cam a22003494i 4500",
3
+ "_validationOptions": {},
4
+ "fields": [
5
+ { "tag": "001", "value": "000000001" },
6
+
7
+ { "tag": "240", "ind1": "1", "ind2": "0", "subfields": [
8
+ { "code": "a", "value": "Seitsemän veljestä." },
9
+ { "code": "l", "value": "ruotsi" }
10
+ ]}
11
+ ]
12
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "description":"Add dot before 240$l. Note that lowercase->uppercase capitalization is done elsewhere",
3
+ "enabled": true,
4
+ "fix": true,
5
+ "only": false
6
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "leader": "01331cam a22003494i 4500",
3
+ "fields": [
4
+ { "tag": "001", "value": "000000001" },
5
+ { "tag": "240", "ind1": "1", "ind2": "0", "subfields": [
6
+ { "code": "a", "value": "Seitsemän veljestä" },
7
+ { "code": "l", "value": "ruotsi" }
8
+ ]}
9
+ ]
10
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "leader": "01331cam a22003494i 4500",
3
+ "_validationOptions": {},
4
+ "fields": [
5
+ { "tag": "001", "value": "000000001" },
6
+ { "tag": "800", "ind1": "1", "ind2": " ",
7
+ "subfields": [
8
+ { "code": "a", "value": "Jaakko, Petteri," },
9
+ { "code": "e", "value": "kirjoittaja." },
10
+ { "code": "t", "value": "Etsiväsarja ;" },
11
+ { "code": "v", "value": "14."},
12
+ { "code": "l", "value": "suomi." }
13
+ ]}
14
+ ]
15
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "description":"Add dot before 240$l. Note that lowercase->uppercase capitalization is done elsewhere",
3
+ "enabled": true,
4
+ "fix": true,
5
+ "only": false
6
+ }