@natlibfi/marc-record-validators-melinda 10.16.0-alpha.1 → 10.16.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/field-008-18-34-character-groups.js +189 -0
- package/dist/field-008-18-34-character-groups.js.map +1 -0
- package/dist/field-008-18-34-character-groups.spec.js +51 -0
- package/dist/field-008-18-34-character-groups.spec.js.map +1 -0
- package/package.json +1 -1
- package/src/field-008-18-34-character-groups.js +142 -0
- package/src/field-008-18-34-character-groups.spec.js +52 -0
- package/test-fixtures/field-008-18-34-character-groups/01/expectedResult.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/01/metadata.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/01/record.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/02/expectedResult.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/02/metadata.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/02/record.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/03/expectedResult.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/03/metadata.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/03/record.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/04/expectedResult.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/04/metadata.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/04/record.json +6 -0
- package/test-fixtures/field-008-18-34-character-groups/05/expectedResult.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/05/metadata.json +7 -0
- package/test-fixtures/field-008-18-34-character-groups/05/record.json +6 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, "__esModule", {
|
|
4
|
+
value: true
|
|
5
|
+
});
|
|
6
|
+
exports.default = _default;
|
|
7
|
+
exports.justifyAndSortField008CharacterGroups = justifyAndSortField008CharacterGroups;
|
|
8
|
+
var _clone = _interopRequireDefault(require("clone"));
|
|
9
|
+
var _utils = require("./utils");
|
|
10
|
+
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
11
|
+
//import createDebugLogger from 'debug';
|
|
12
|
+
|
|
13
|
+
// Author(s): Nicholas Volk
|
|
14
|
+
// NB! CR 008/24 vs 008/25-27 is not supported yet!
|
|
15
|
+
|
|
16
|
+
function _default() {
|
|
17
|
+
return {
|
|
18
|
+
description: 'Justify left and sort character groups within 008/18-24',
|
|
19
|
+
validate,
|
|
20
|
+
fix
|
|
21
|
+
};
|
|
22
|
+
function fix(record) {
|
|
23
|
+
const typeOfMaterial = record.getTypeOfMaterial();
|
|
24
|
+
record.fields.forEach(field => {
|
|
25
|
+
justifyAndSortField008CharacterGroups(field, typeOfMaterial);
|
|
26
|
+
});
|
|
27
|
+
// Fix always succeeds (even when it really does not):
|
|
28
|
+
const res = {
|
|
29
|
+
message: [],
|
|
30
|
+
fix: [],
|
|
31
|
+
valid: true
|
|
32
|
+
};
|
|
33
|
+
return res;
|
|
34
|
+
}
|
|
35
|
+
function validate(record) {
|
|
36
|
+
const res = {
|
|
37
|
+
message: []
|
|
38
|
+
};
|
|
39
|
+
const typeOfMaterial = record.getTypeOfMaterial();
|
|
40
|
+
record.fields?.forEach(field => {
|
|
41
|
+
validateField(field, res, typeOfMaterial);
|
|
42
|
+
});
|
|
43
|
+
res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
|
|
44
|
+
return res;
|
|
45
|
+
}
|
|
46
|
+
function validateField(field, res, typeOfMaterial) {
|
|
47
|
+
if (field.tag !== '008') {
|
|
48
|
+
// Optimize code a bit...
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const orig = (0, _utils.fieldToString)(field);
|
|
52
|
+
const normalizedField = justifyAndSortField008CharacterGroups((0, _clone.default)(field), typeOfMaterial);
|
|
53
|
+
const mod = (0, _utils.fieldToString)(normalizedField);
|
|
54
|
+
if (orig !== mod) {
|
|
55
|
+
// Fail as the input is "broken"/"crap"/sumthing
|
|
56
|
+
res.message.push(`TODO: '${orig}' => '${mod}'`); // eslint-disable-line functional/immutable-data
|
|
57
|
+
return;
|
|
58
|
+
}
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Should we add legal values?
|
|
64
|
+
const characterGroups = [{
|
|
65
|
+
type: 'BK',
|
|
66
|
+
start: 18,
|
|
67
|
+
end: 21,
|
|
68
|
+
sort: true,
|
|
69
|
+
name: 'illustrations'
|
|
70
|
+
}, {
|
|
71
|
+
type: 'BK',
|
|
72
|
+
start: 24,
|
|
73
|
+
end: 27,
|
|
74
|
+
sort: true,
|
|
75
|
+
name: 'nature of contents'
|
|
76
|
+
},
|
|
77
|
+
// English doc does not explicitly mention alphabetical sorting... Finnish does.
|
|
78
|
+
{
|
|
79
|
+
type: 'CR',
|
|
80
|
+
start: 25,
|
|
81
|
+
end: 27,
|
|
82
|
+
sort: true,
|
|
83
|
+
name: 'nature of contents'
|
|
84
|
+
},
|
|
85
|
+
// NB! 24 vs 25-27 logic needs to be implemented separately
|
|
86
|
+
{
|
|
87
|
+
type: 'MP',
|
|
88
|
+
start: 18,
|
|
89
|
+
end: 21,
|
|
90
|
+
sort: false,
|
|
91
|
+
name: 'relief'
|
|
92
|
+
},
|
|
93
|
+
// Order of importance!
|
|
94
|
+
{
|
|
95
|
+
type: 'MP',
|
|
96
|
+
start: 33,
|
|
97
|
+
end: 34,
|
|
98
|
+
sort: false,
|
|
99
|
+
name: 'special format of characteristics'
|
|
100
|
+
},
|
|
101
|
+
// Order of importance!
|
|
102
|
+
{
|
|
103
|
+
type: 'MU',
|
|
104
|
+
start: 24,
|
|
105
|
+
end: 29,
|
|
106
|
+
sort: true,
|
|
107
|
+
name: 'accompanying material'
|
|
108
|
+
}, {
|
|
109
|
+
type: 'MU',
|
|
110
|
+
start: 30,
|
|
111
|
+
end: 31,
|
|
112
|
+
sort: true,
|
|
113
|
+
name: 'literary text for sound recordings'
|
|
114
|
+
}];
|
|
115
|
+
const BIG_BAD_VALUE = 999999999;
|
|
116
|
+
function processCharacterGroup(field, group) {
|
|
117
|
+
const originalContent = field.value.substring(group.start, group.end + 1);
|
|
118
|
+
const content = removeDuplicateValues(fixBlanks(originalContent));
|
|
119
|
+
//console.info(`008/${group.start}-${group.end}: '${originalContent}'`); // eslint-disable-line no-console
|
|
120
|
+
const charArray = content.split('');
|
|
121
|
+
charArray.sort(function (a, b) {
|
|
122
|
+
// eslint-disable-line functional/immutable-data, prefer-arrow-callback
|
|
123
|
+
return scoreChar(a) - scoreChar(b);
|
|
124
|
+
});
|
|
125
|
+
const newContent = charArray.join('');
|
|
126
|
+
if (originalContent === newContent) {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
//console.info(`'${fieldToString(field)}' =>`); // eslint-disable-line no-console
|
|
131
|
+
|
|
132
|
+
field.value = `${field.value.substring(0, group.start)}${newContent}${field.value.substring(group.end + 1)}`; // eslint-disable-line functional/immutable-data
|
|
133
|
+
//console.info(`'${fieldToString(field)}'`); // eslint-disable-line no-console
|
|
134
|
+
|
|
135
|
+
function fixBlanks(str) {
|
|
136
|
+
if (str.includes('|') && str.match(/[^ |]/u)) {
|
|
137
|
+
return str.replaceAll('|', ' ');
|
|
138
|
+
}
|
|
139
|
+
return str;
|
|
140
|
+
}
|
|
141
|
+
function scoreChar(c) {
|
|
142
|
+
if (c === '|' || c === ' ') {
|
|
143
|
+
return BIG_BAD_VALUE; // Max value, these should code last
|
|
144
|
+
}
|
|
145
|
+
if (!group.sort) {
|
|
146
|
+
// more meaningful comes first: keep the original order
|
|
147
|
+
return 1;
|
|
148
|
+
}
|
|
149
|
+
const asciiCode = c.charCodeAt(0);
|
|
150
|
+
// a-z get values 1-26:
|
|
151
|
+
if (asciiCode >= 97 && asciiCode <= 122) {
|
|
152
|
+
return asciiCode - 96;
|
|
153
|
+
}
|
|
154
|
+
// 0-9 get values 100-109
|
|
155
|
+
if (asciiCode >= 48 && asciiCode <= 57) {
|
|
156
|
+
return asciiCode + 52;
|
|
157
|
+
}
|
|
158
|
+
// Others (=crap) return something between '9' and BIG BAD VALUE
|
|
159
|
+
return asciiCode + 200;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
function justifyAndSortField008CharacterGroups(field, typeOfMaterial) {
|
|
163
|
+
if (field.tag !== '008' || field.subfields) {
|
|
164
|
+
return field;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
//console.info(typeOfMaterial); // eslint-disable-line no-console
|
|
168
|
+
|
|
169
|
+
const relevantCharacterGroups = characterGroups.filter(gr => gr.type === typeOfMaterial);
|
|
170
|
+
relevantCharacterGroups.forEach(group => processCharacterGroup(field, group));
|
|
171
|
+
|
|
172
|
+
//justifyField008CharacterGroups(field, typeOfMaterial); // Oops: also sorts...
|
|
173
|
+
|
|
174
|
+
// NB! add value # and | normalizations
|
|
175
|
+
//fixBlanks(field, typeOfMaterial);
|
|
176
|
+
|
|
177
|
+
return field;
|
|
178
|
+
}
|
|
179
|
+
function removeDuplicateValues(str) {
|
|
180
|
+
const arr = str.split('');
|
|
181
|
+
// Take only the first instance of a proper value-carrying character
|
|
182
|
+
const reducedStr = arr.filter((c, i) => c === ' ' || c === '|' || arr.indexOf(c) === i).join('');
|
|
183
|
+
//console.info(`I: '${str}'`); // eslint-disable-line no-console
|
|
184
|
+
//console.info(`M: '${reducedStr}'`); // eslint-disable-line no-console
|
|
185
|
+
const output = `${reducedStr}${' '.repeat(str.length - reducedStr.length)}`; // Had some weird trouble with str.padEnd(n)
|
|
186
|
+
//console.info(`M: '${output}'`); // eslint-disable-line no-console
|
|
187
|
+
return output;
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=field-008-18-34-character-groups.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"field-008-18-34-character-groups.js","names":["_clone","_interopRequireDefault","require","_utils","obj","__esModule","default","_default","description","validate","fix","record","typeOfMaterial","getTypeOfMaterial","fields","forEach","field","justifyAndSortField008CharacterGroups","res","message","valid","validateField","length","tag","orig","fieldToString","normalizedField","clone","mod","push","characterGroups","type","start","end","sort","name","BIG_BAD_VALUE","processCharacterGroup","group","originalContent","value","substring","content","removeDuplicateValues","fixBlanks","charArray","split","a","b","scoreChar","newContent","join","str","includes","match","replaceAll","c","asciiCode","charCodeAt","subfields","relevantCharacterGroups","filter","gr","arr","reducedStr","i","indexOf","output","repeat"],"sources":["../src/field-008-18-34-character-groups.js"],"sourcesContent":["//import createDebugLogger from 'debug';\nimport clone from 'clone';\nimport {fieldToString} from './utils';\n// Author(s): Nicholas Volk\n// NB! CR 008/24 vs 008/25-27 is not supported yet!\n\nexport default function () {\n\n return {\n description: 'Justify left and sort character groups within 008/18-24',\n validate, fix\n };\n\n function fix(record) {\n const typeOfMaterial = record.getTypeOfMaterial();\n record.fields.forEach(field => {\n justifyAndSortField008CharacterGroups(field, typeOfMaterial);\n });\n // Fix always succeeds (even when it really does not):\n const res = {message: [], fix: [], valid: true};\n return res;\n }\n\n function validate(record) {\n const res = {message: []};\n\n const typeOfMaterial = record.getTypeOfMaterial();\n\n record.fields?.forEach(field => {\n validateField(field, res, typeOfMaterial);\n });\n\n res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data\n return res;\n }\n\n function validateField(field, res, typeOfMaterial) {\n if (field.tag !== '008') { // Optimize code a bit...\n return;\n }\n const orig = fieldToString(field);\n\n const normalizedField = justifyAndSortField008CharacterGroups(clone(field), typeOfMaterial);\n const mod = fieldToString(normalizedField);\n if (orig !== mod) { // Fail as the input is \"broken\"/\"crap\"/sumthing\n res.message.push(`TODO: '${orig}' => '${mod}'`); // eslint-disable-line functional/immutable-data\n return;\n }\n return;\n }\n}\n\n// Should we add legal values?\nconst characterGroups = [\n {type: 'BK', start: 18, end: 21, sort: true, name: 'illustrations'},\n {type: 'BK', start: 24, end: 27, sort: true, name: 'nature of contents'}, // English doc does not explicitly mention alphabetical sorting... Finnish does.\n {type: 'CR', start: 25, end: 27, sort: true, name: 'nature of contents'}, // NB! 24 vs 25-27 logic needs to be implemented separately\n {type: 'MP', start: 18, end: 21, sort: false, name: 'relief'}, // Order of importance!\n {type: 'MP', start: 33, end: 34, sort: false, name: 'special format of characteristics'}, // Order of importance!\n {type: 'MU', start: 24, end: 29, sort: true, name: 'accompanying material'},\n {type: 'MU', start: 30, end: 31, sort: true, name: 'literary text for sound recordings'}\n];\n\nconst BIG_BAD_VALUE = 999999999;\n\nfunction processCharacterGroup(field, group) {\n const originalContent = field.value.substring(group.start, group.end + 1);\n const content = removeDuplicateValues(fixBlanks(originalContent));\n //console.info(`008/${group.start}-${group.end}: '${originalContent}'`); // eslint-disable-line no-console\n const charArray = content.split('');\n\n charArray.sort(function(a, b) { // eslint-disable-line functional/immutable-data, prefer-arrow-callback\n return scoreChar(a) - scoreChar(b);\n });\n\n const newContent = charArray.join('');\n if (originalContent === newContent) {\n return;\n }\n\n //console.info(`'${fieldToString(field)}' =>`); // eslint-disable-line no-console\n\n field.value = `${field.value.substring(0, group.start)}${newContent}${field.value.substring(group.end + 1)}`; // eslint-disable-line functional/immutable-data\n //console.info(`'${fieldToString(field)}'`); // eslint-disable-line no-console\n\n function fixBlanks(str) {\n if (str.includes('|') && str.match(/[^ |]/u)) {\n return str.replaceAll('|', ' ');\n }\n return str;\n }\n\n function scoreChar(c) {\n if (c === '|' || c === ' ') {\n return BIG_BAD_VALUE; // Max value, these should code last\n }\n if (!group.sort) { // more meaningful comes first: keep the original order\n return 1;\n }\n const asciiCode = c.charCodeAt(0);\n // a-z get values 1-26:\n if (asciiCode >= 97 && asciiCode <= 122) {\n return asciiCode - 96;\n }\n // 0-9 get values 100-109\n if (asciiCode >= 48 && asciiCode <= 57) {\n return asciiCode + 52;\n }\n // Others (=crap) return something between '9' and BIG BAD VALUE\n return asciiCode + 200;\n }\n}\n\nexport function justifyAndSortField008CharacterGroups(field, typeOfMaterial) {\n if (field.tag !== '008' || field.subfields) {\n return field;\n }\n\n //console.info(typeOfMaterial); // eslint-disable-line no-console\n\n const relevantCharacterGroups = characterGroups.filter(gr => gr.type === typeOfMaterial);\n\n relevantCharacterGroups.forEach(group => processCharacterGroup(field, group));\n\n //justifyField008CharacterGroups(field, typeOfMaterial); // Oops: also sorts...\n\n // NB! add value # and | normalizations\n //fixBlanks(field, typeOfMaterial);\n\n return field;\n}\n\nfunction removeDuplicateValues(str) {\n const arr = str.split('');\n // Take only the first instance of a proper value-carrying character\n const reducedStr = arr.filter((c, i) => c === ' ' || c === '|' || arr.indexOf(c) === i).join('');\n //console.info(`I: '${str}'`); // eslint-disable-line no-console\n //console.info(`M: '${reducedStr}'`); // eslint-disable-line no-console\n const output = `${reducedStr}${' '.repeat(str.length - reducedStr.length)}`; // Had some weird trouble with str.padEnd(n)\n //console.info(`M: '${output}'`); // eslint-disable-line no-console\n return output;\n}\n"],"mappings":";;;;;;;AACA,IAAAA,MAAA,GAAAC,sBAAA,CAAAC,OAAA;AACA,IAAAC,MAAA,GAAAD,OAAA;AAAsC,SAAAD,uBAAAG,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAFtC;;AAGA;AACA;;AAEe,SAAAG,SAAA,EAAY;EAEzB,OAAO;IACLC,WAAW,EAAE,yDAAyD;IACtEC,QAAQ;IAAEC;EACZ,CAAC;EAED,SAASA,GAAGA,CAACC,MAAM,EAAE;IACnB,MAAMC,cAAc,GAAGD,MAAM,CAACE,iBAAiB,CAAC,CAAC;IACjDF,MAAM,CAACG,MAAM,CAACC,OAAO,CAACC,KAAK,IAAI;MAC7BC,qCAAqC,CAACD,KAAK,EAAEJ,cAAc,CAAC;IAC9D,CAAC,CAAC;IACF;IACA,MAAMM,GAAG,GAAG;MAACC,OAAO,EAAE,EAAE;MAAET,GAAG,EAAE,EAAE;MAAEU,KAAK,EAAE;IAAI,CAAC;IAC/C,OAAOF,GAAG;EACZ;EAEA,SAAST,QAAQA,CAACE,MAAM,EAAE;IACxB,MAAMO,GAAG,GAAG;MAACC,OAAO,EAAE;IAAE,CAAC;IAEzB,MAAMP,cAAc,GAAGD,MAAM,CAACE,iBAAiB,CAAC,CAAC;IAEjDF,MAAM,CAACG,MAAM,EAAEC,OAAO,CAACC,KAAK,IAAI;MAC9BK,aAAa,CAACL,KAAK,EAAEE,GAAG,EAAEN,cAAc,CAAC;IAC3C,CAAC,CAAC;IAEFM,GAAG,CAACE,KAAK,GAAG,EAAEF,GAAG,CAACC,OAAO,CAACG,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC;IACxC,OAAOJ,GAAG;EACZ;EAEA,SAASG,aAAaA,CAACL,KAAK,EAAEE,GAAG,EAAEN,cAAc,EAAE;IACjD,IAAII,KAAK,CAACO,GAAG,KAAK,KAAK,EAAE;MAAE;MACzB;IACF;IACA,MAAMC,IAAI,GAAG,IAAAC,oBAAa,EAACT,KAAK,CAAC;IAEjC,MAAMU,eAAe,GAAGT,qCAAqC,CAAC,IAAAU,cAAK,EAACX,KAAK,CAAC,EAAEJ,cAAc,CAAC;IAC3F,MAAMgB,GAAG,GAAG,IAAAH,oBAAa,EAACC,eAAe,CAAC;IAC1C,IAAIF,IAAI,KAAKI,GAAG,EAAE;MAAE;MAClBV,GAAG,CAACC,OAAO,CAACU,IAAI,CAAE,UAASL,IAAK,SAAQI,GAAI,GAAE,CAAC,CAAC,CAAC;MACjD;IACF;IACA;EACF;AACF;;AAEA;AACA,MAAME,eAAe,GAAG,CACtB;EAACC,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,IAAI;EAAEC,IAAI,EAAE;AAAe,CAAC,EACnE;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,IAAI;EAAEC,IAAI,EAAE;AAAoB,CAAC;AAAE;AAC1E;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,IAAI;EAAEC,IAAI,EAAE;AAAoB,CAAC;AAAE;AAC1E;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,KAAK;EAAEC,IAAI,EAAE;AAAQ,CAAC;AAAE;AAC/D;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,KAAK;EAAEC,IAAI,EAAE;AAAmC,CAAC;AAAE;AAC1F;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,IAAI;EAAEC,IAAI,EAAE;AAAuB,CAAC,EAC3E;EAACJ,IAAI,EAAE,IAAI;EAAEC,KAAK,EAAE,EAAE;EAAEC,GAAG,EAAE,EAAE;EAAEC,IAAI,EAAE,IAAI;EAAEC,IAAI,EAAE;AAAoC,CAAC,CACzF;AAED,MAAMC,aAAa,GAAG,SAAS;AAE/B,SAASC,qBAAqBA,CAACrB,KAAK,EAAEsB,KAAK,EAAE;EAC3C,MAAMC,eAAe,GAAGvB,KAAK,CAACwB,KAAK,CAACC,SAAS,CAACH,KAAK,CAACN,KAAK,EAAEM,KAAK,CAACL,GAAG,GAAG,CAAC,CAAC;EACzE,MAAMS,OAAO,GAAGC,qBAAqB,CAACC,SAAS,CAACL,eAAe,CAAC,CAAC;EACjE;EACA,MAAMM,SAAS,GAAGH,OAAO,CAACI,KAAK,CAAC,EAAE,CAAC;EAEnCD,SAAS,CAACX,IAAI,CAAC,UAASa,CAAC,EAAEC,CAAC,EAAE;IAAE;IAC9B,OAAOC,SAAS,CAACF,CAAC,CAAC,GAAGE,SAAS,CAACD,CAAC,CAAC;EACpC,CAAC,CAAC;EAEF,MAAME,UAAU,GAAGL,SAAS,CAACM,IAAI,CAAC,EAAE,CAAC;EACrC,IAAIZ,eAAe,KAAKW,UAAU,EAAE;IAClC;EACF;;EAEA;;EAEAlC,KAAK,CAACwB,KAAK,GAAI,GAAExB,KAAK,CAACwB,KAAK,CAACC,SAAS,CAAC,CAAC,EAAEH,KAAK,CAACN,KAAK,CAAE,GAAEkB,UAAW,GAAElC,KAAK,CAACwB,KAAK,CAACC,SAAS,CAACH,KAAK,CAACL,GAAG,GAAG,CAAC,CAAE,EAAC,CAAC,CAAC;EAC9G;;EAEA,SAASW,SAASA,CAACQ,GAAG,EAAE;IACtB,IAAIA,GAAG,CAACC,QAAQ,CAAC,GAAG,CAAC,IAAID,GAAG,CAACE,KAAK,CAAC,QAAQ,CAAC,EAAE;MAC5C,OAAOF,GAAG,CAACG,UAAU,CAAC,GAAG,EAAE,GAAG,CAAC;IACjC;IACA,OAAOH,GAAG;EACZ;EAEA,SAASH,SAASA,CAACO,CAAC,EAAE;IACpB,IAAIA,CAAC,KAAK,GAAG,IAAIA,CAAC,KAAK,GAAG,EAAE;MAC1B,OAAOpB,aAAa,CAAC,CAAC;IACxB;IACA,IAAI,CAACE,KAAK,CAACJ,IAAI,EAAE;MAAE;MACjB,OAAO,CAAC;IACV;IACA,MAAMuB,SAAS,GAAGD,CAAC,CAACE,UAAU,CAAC,CAAC,CAAC;IACjC;IACA,IAAID,SAAS,IAAI,EAAE,IAAIA,SAAS,IAAI,GAAG,EAAE;MACvC,OAAOA,SAAS,GAAG,EAAE;IACvB;IACA;IACA,IAAIA,SAAS,IAAI,EAAE,IAAIA,SAAS,IAAI,EAAE,EAAE;MACtC,OAAOA,SAAS,GAAG,EAAE;IACvB;IACA;IACA,OAAOA,SAAS,GAAG,GAAG;EACxB;AACF;AAEO,SAASxC,qCAAqCA,CAACD,KAAK,EAAEJ,cAAc,EAAE;EAC3E,IAAII,KAAK,CAACO,GAAG,KAAK,KAAK,IAAIP,KAAK,CAAC2C,SAAS,EAAE;IAC1C,OAAO3C,KAAK;EACd;;EAEA;;EAEA,MAAM4C,uBAAuB,GAAG9B,eAAe,CAAC+B,MAAM,CAACC,EAAE,IAAIA,EAAE,CAAC/B,IAAI,KAAKnB,cAAc,CAAC;EAExFgD,uBAAuB,CAAC7C,OAAO,CAACuB,KAAK,IAAID,qBAAqB,CAACrB,KAAK,EAAEsB,KAAK,CAAC,CAAC;;EAE7E;;EAEA;EACA;;EAEA,OAAOtB,KAAK;AACd;AAEA,SAAS2B,qBAAqBA,CAACS,GAAG,EAAE;EAClC,MAAMW,GAAG,GAAGX,GAAG,CAACN,KAAK,CAAC,EAAE,CAAC;EACzB;EACA,MAAMkB,UAAU,GAAGD,GAAG,CAACF,MAAM,CAAC,CAACL,CAAC,EAAES,CAAC,KAAKT,CAAC,KAAK,GAAG,IAAIA,CAAC,KAAK,GAAG,IAAIO,GAAG,CAACG,OAAO,CAACV,CAAC,CAAC,KAAKS,CAAC,CAAC,CAACd,IAAI,CAAC,EAAE,CAAC;EAChG;EACA;EACA,MAAMgB,MAAM,GAAI,GAAEH,UAAW,GAAE,GAAG,CAACI,MAAM,CAAChB,GAAG,CAAC9B,MAAM,GAAG0C,UAAU,CAAC1C,MAAM,CAAE,EAAC,CAAC,CAAC;EAC7E;EACA,OAAO6C,MAAM;AACf"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
var _chai = require("chai");
|
|
4
|
+
var _marcRecord = require("@natlibfi/marc-record");
|
|
5
|
+
var _field0081834CharacterGroups = _interopRequireDefault(require("./field-008-18-34-character-groups"));
|
|
6
|
+
var _fixura = require("@natlibfi/fixura");
|
|
7
|
+
var _fixugen = _interopRequireDefault(require("@natlibfi/fixugen"));
|
|
8
|
+
var _debug = _interopRequireDefault(require("debug"));
|
|
9
|
+
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
|
|
10
|
+
(0, _fixugen.default)({
|
|
11
|
+
callback,
|
|
12
|
+
path: [__dirname, '..', 'test-fixtures', 'field-008-18-34-character-groups'],
|
|
13
|
+
useMetadataFile: true,
|
|
14
|
+
recurse: false,
|
|
15
|
+
fixura: {
|
|
16
|
+
reader: _fixura.READERS.JSON
|
|
17
|
+
},
|
|
18
|
+
mocha: {
|
|
19
|
+
before: () => testValidatorFactory()
|
|
20
|
+
}
|
|
21
|
+
});
|
|
22
|
+
const debug = (0, _debug.default)('@natlibfi/marc-record-validators-melinda/field-008-18-34-character-groups:test');
|
|
23
|
+
async function testValidatorFactory() {
|
|
24
|
+
const validator = await (0, _field0081834CharacterGroups.default)();
|
|
25
|
+
(0, _chai.expect)(validator).to.be.an('object').that.has.any.keys('description', 'validate');
|
|
26
|
+
(0, _chai.expect)(validator.description).to.be.a('string');
|
|
27
|
+
(0, _chai.expect)(validator.validate).to.be.a('function');
|
|
28
|
+
}
|
|
29
|
+
async function callback({
|
|
30
|
+
getFixture,
|
|
31
|
+
enabled = true,
|
|
32
|
+
fix = false
|
|
33
|
+
}) {
|
|
34
|
+
if (enabled === false) {
|
|
35
|
+
debug('TEST SKIPPED!');
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
const validator = await (0, _field0081834CharacterGroups.default)();
|
|
39
|
+
const record = new _marcRecord.MarcRecord(getFixture('record.json'));
|
|
40
|
+
const expectedResult = getFixture('expectedResult.json');
|
|
41
|
+
// console.log(expectedResult); // eslint-disable-line
|
|
42
|
+
|
|
43
|
+
if (!fix) {
|
|
44
|
+
const result = await validator.validate(record);
|
|
45
|
+
(0, _chai.expect)(result).to.eql(expectedResult);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
await validator.fix(record);
|
|
49
|
+
(0, _chai.expect)(record).to.eql(expectedResult);
|
|
50
|
+
}
|
|
51
|
+
//# sourceMappingURL=field-008-18-34-character-groups.spec.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"field-008-18-34-character-groups.spec.js","names":["_chai","require","_marcRecord","_field0081834CharacterGroups","_interopRequireDefault","_fixura","_fixugen","_debug","obj","__esModule","default","generateTests","callback","path","__dirname","useMetadataFile","recurse","fixura","reader","READERS","JSON","mocha","before","testValidatorFactory","debug","createDebugLogger","validator","validatorFactory","expect","to","be","an","that","has","any","keys","description","a","validate","getFixture","enabled","fix","record","MarcRecord","expectedResult","result","eql"],"sources":["../src/field-008-18-34-character-groups.spec.js"],"sourcesContent":["import {expect} from 'chai';\nimport {MarcRecord} from '@natlibfi/marc-record';\nimport validatorFactory from './field-008-18-34-character-groups';\nimport {READERS} from '@natlibfi/fixura';\nimport generateTests from '@natlibfi/fixugen';\nimport createDebugLogger from 'debug';\n\ngenerateTests({\n callback,\n path: [__dirname, '..', 'test-fixtures', 'field-008-18-34-character-groups'],\n useMetadataFile: true,\n recurse: false,\n fixura: {\n reader: READERS.JSON\n },\n mocha: {\n before: () => testValidatorFactory()\n }\n});\nconst debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/field-008-18-34-character-groups:test');\n\nasync function testValidatorFactory() {\n const validator = await validatorFactory();\n\n expect(validator)\n .to.be.an('object')\n .that.has.any.keys('description', 'validate');\n\n expect(validator.description).to.be.a('string');\n expect(validator.validate).to.be.a('function');\n}\n\nasync function callback({getFixture, enabled = true, fix = false}) {\n if (enabled === false) {\n debug('TEST SKIPPED!');\n return;\n }\n\n const validator = await validatorFactory();\n const record = new MarcRecord(getFixture('record.json'));\n const expectedResult = getFixture('expectedResult.json');\n // console.log(expectedResult); // eslint-disable-line\n\n if (!fix) {\n const result = await validator.validate(record);\n expect(result).to.eql(expectedResult);\n return;\n }\n\n await validator.fix(record);\n expect(record).to.eql(expectedResult);\n}\n"],"mappings":";;AAAA,IAAAA,KAAA,GAAAC,OAAA;AACA,IAAAC,WAAA,GAAAD,OAAA;AACA,IAAAE,4BAAA,GAAAC,sBAAA,CAAAH,OAAA;AACA,IAAAI,OAAA,GAAAJ,OAAA;AACA,IAAAK,QAAA,GAAAF,sBAAA,CAAAH,OAAA;AACA,IAAAM,MAAA,GAAAH,sBAAA,CAAAH,OAAA;AAAsC,SAAAG,uBAAAI,GAAA,WAAAA,GAAA,IAAAA,GAAA,CAAAC,UAAA,GAAAD,GAAA,KAAAE,OAAA,EAAAF,GAAA;AAEtC,IAAAG,gBAAa,EAAC;EACZC,QAAQ;EACRC,IAAI,EAAE,CAACC,SAAS,EAAE,IAAI,EAAE,eAAe,EAAE,kCAAkC,CAAC;EAC5EC,eAAe,EAAE,IAAI;EACrBC,OAAO,EAAE,KAAK;EACdC,MAAM,EAAE;IACNC,MAAM,EAAEC,eAAO,CAACC;EAClB,CAAC;EACDC,KAAK,EAAE;IACLC,MAAM,EAAEA,CAAA,KAAMC,oBAAoB,CAAC;EACrC;AACF,CAAC,CAAC;AACF,MAAMC,KAAK,GAAG,IAAAC,cAAiB,EAAC,gFAAgF,CAAC;AAEjH,eAAeF,oBAAoBA,CAAA,EAAG;EACpC,MAAMG,SAAS,GAAG,MAAM,IAAAC,oCAAgB,EAAC,CAAC;EAE1C,IAAAC,YAAM,EAACF,SAAS,CAAC,CACdG,EAAE,CAACC,EAAE,CAACC,EAAE,CAAC,QAAQ,CAAC,CAClBC,IAAI,CAACC,GAAG,CAACC,GAAG,CAACC,IAAI,CAAC,aAAa,EAAE,UAAU,CAAC;EAE/C,IAAAP,YAAM,EAACF,SAAS,CAACU,WAAW,CAAC,CAACP,EAAE,CAACC,EAAE,CAACO,CAAC,CAAC,QAAQ,CAAC;EAC/C,IAAAT,YAAM,EAACF,SAAS,CAACY,QAAQ,CAAC,CAACT,EAAE,CAACC,EAAE,CAACO,CAAC,CAAC,UAAU,CAAC;AAChD;AAEA,eAAezB,QAAQA,CAAC;EAAC2B,UAAU;EAAEC,OAAO,GAAG,IAAI;EAAEC,GAAG,GAAG;AAAK,CAAC,EAAE;EACjE,IAAID,OAAO,KAAK,KAAK,EAAE;IACrBhB,KAAK,CAAC,eAAe,CAAC;IACtB;EACF;EAEA,MAAME,SAAS,GAAG,MAAM,IAAAC,oCAAgB,EAAC,CAAC;EAC1C,MAAMe,MAAM,GAAG,IAAIC,sBAAU,CAACJ,UAAU,CAAC,aAAa,CAAC,CAAC;EACxD,MAAMK,cAAc,GAAGL,UAAU,CAAC,qBAAqB,CAAC;EACxD;;EAEA,IAAI,CAACE,GAAG,EAAE;IACR,MAAMI,MAAM,GAAG,MAAMnB,SAAS,CAACY,QAAQ,CAACI,MAAM,CAAC;IAC/C,IAAAd,YAAM,EAACiB,MAAM,CAAC,CAAChB,EAAE,CAACiB,GAAG,CAACF,cAAc,CAAC;IACrC;EACF;EAEA,MAAMlB,SAAS,CAACe,GAAG,CAACC,MAAM,CAAC;EAC3B,IAAAd,YAAM,EAACc,MAAM,CAAC,CAACb,EAAE,CAACiB,GAAG,CAACF,cAAc,CAAC;AACvC"}
|
package/package.json
CHANGED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
//import createDebugLogger from 'debug';
|
|
2
|
+
import clone from 'clone';
|
|
3
|
+
import {fieldToString} from './utils';
|
|
4
|
+
// Author(s): Nicholas Volk
|
|
5
|
+
// NB! CR 008/24 vs 008/25-27 is not supported yet!
|
|
6
|
+
|
|
7
|
+
export default function () {
|
|
8
|
+
|
|
9
|
+
return {
|
|
10
|
+
description: 'Justify left and sort character groups within 008/18-24',
|
|
11
|
+
validate, fix
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function fix(record) {
|
|
15
|
+
const typeOfMaterial = record.getTypeOfMaterial();
|
|
16
|
+
record.fields.forEach(field => {
|
|
17
|
+
justifyAndSortField008CharacterGroups(field, typeOfMaterial);
|
|
18
|
+
});
|
|
19
|
+
// Fix always succeeds (even when it really does not):
|
|
20
|
+
const res = {message: [], fix: [], valid: true};
|
|
21
|
+
return res;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function validate(record) {
|
|
25
|
+
const res = {message: []};
|
|
26
|
+
|
|
27
|
+
const typeOfMaterial = record.getTypeOfMaterial();
|
|
28
|
+
|
|
29
|
+
record.fields?.forEach(field => {
|
|
30
|
+
validateField(field, res, typeOfMaterial);
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
|
|
34
|
+
return res;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function validateField(field, res, typeOfMaterial) {
|
|
38
|
+
if (field.tag !== '008') { // Optimize code a bit...
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
const orig = fieldToString(field);
|
|
42
|
+
|
|
43
|
+
const normalizedField = justifyAndSortField008CharacterGroups(clone(field), typeOfMaterial);
|
|
44
|
+
const mod = fieldToString(normalizedField);
|
|
45
|
+
if (orig !== mod) { // Fail as the input is "broken"/"crap"/sumthing
|
|
46
|
+
res.message.push(`TODO: '${orig}' => '${mod}'`); // eslint-disable-line functional/immutable-data
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// Should we add legal values?
|
|
54
|
+
const characterGroups = [
|
|
55
|
+
{type: 'BK', start: 18, end: 21, sort: true, name: 'illustrations'},
|
|
56
|
+
{type: 'BK', start: 24, end: 27, sort: true, name: 'nature of contents'}, // English doc does not explicitly mention alphabetical sorting... Finnish does.
|
|
57
|
+
{type: 'CR', start: 25, end: 27, sort: true, name: 'nature of contents'}, // NB! 24 vs 25-27 logic needs to be implemented separately
|
|
58
|
+
{type: 'MP', start: 18, end: 21, sort: false, name: 'relief'}, // Order of importance!
|
|
59
|
+
{type: 'MP', start: 33, end: 34, sort: false, name: 'special format of characteristics'}, // Order of importance!
|
|
60
|
+
{type: 'MU', start: 24, end: 29, sort: true, name: 'accompanying material'},
|
|
61
|
+
{type: 'MU', start: 30, end: 31, sort: true, name: 'literary text for sound recordings'}
|
|
62
|
+
];
|
|
63
|
+
|
|
64
|
+
const BIG_BAD_VALUE = 999999999;
|
|
65
|
+
|
|
66
|
+
function processCharacterGroup(field, group) {
|
|
67
|
+
const originalContent = field.value.substring(group.start, group.end + 1);
|
|
68
|
+
const content = removeDuplicateValues(fixBlanks(originalContent));
|
|
69
|
+
//console.info(`008/${group.start}-${group.end}: '${originalContent}'`); // eslint-disable-line no-console
|
|
70
|
+
const charArray = content.split('');
|
|
71
|
+
|
|
72
|
+
charArray.sort(function(a, b) { // eslint-disable-line functional/immutable-data, prefer-arrow-callback
|
|
73
|
+
return scoreChar(a) - scoreChar(b);
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
const newContent = charArray.join('');
|
|
77
|
+
if (originalContent === newContent) {
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
//console.info(`'${fieldToString(field)}' =>`); // eslint-disable-line no-console
|
|
82
|
+
|
|
83
|
+
field.value = `${field.value.substring(0, group.start)}${newContent}${field.value.substring(group.end + 1)}`; // eslint-disable-line functional/immutable-data
|
|
84
|
+
//console.info(`'${fieldToString(field)}'`); // eslint-disable-line no-console
|
|
85
|
+
|
|
86
|
+
function fixBlanks(str) {
|
|
87
|
+
if (str.includes('|') && str.match(/[^ |]/u)) {
|
|
88
|
+
return str.replaceAll('|', ' ');
|
|
89
|
+
}
|
|
90
|
+
return str;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function scoreChar(c) {
|
|
94
|
+
if (c === '|' || c === ' ') {
|
|
95
|
+
return BIG_BAD_VALUE; // Max value, these should code last
|
|
96
|
+
}
|
|
97
|
+
if (!group.sort) { // more meaningful comes first: keep the original order
|
|
98
|
+
return 1;
|
|
99
|
+
}
|
|
100
|
+
const asciiCode = c.charCodeAt(0);
|
|
101
|
+
// a-z get values 1-26:
|
|
102
|
+
if (asciiCode >= 97 && asciiCode <= 122) {
|
|
103
|
+
return asciiCode - 96;
|
|
104
|
+
}
|
|
105
|
+
// 0-9 get values 100-109
|
|
106
|
+
if (asciiCode >= 48 && asciiCode <= 57) {
|
|
107
|
+
return asciiCode + 52;
|
|
108
|
+
}
|
|
109
|
+
// Others (=crap) return something between '9' and BIG BAD VALUE
|
|
110
|
+
return asciiCode + 200;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function justifyAndSortField008CharacterGroups(field, typeOfMaterial) {
|
|
115
|
+
if (field.tag !== '008' || field.subfields) {
|
|
116
|
+
return field;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
//console.info(typeOfMaterial); // eslint-disable-line no-console
|
|
120
|
+
|
|
121
|
+
const relevantCharacterGroups = characterGroups.filter(gr => gr.type === typeOfMaterial);
|
|
122
|
+
|
|
123
|
+
relevantCharacterGroups.forEach(group => processCharacterGroup(field, group));
|
|
124
|
+
|
|
125
|
+
//justifyField008CharacterGroups(field, typeOfMaterial); // Oops: also sorts...
|
|
126
|
+
|
|
127
|
+
// NB! add value # and | normalizations
|
|
128
|
+
//fixBlanks(field, typeOfMaterial);
|
|
129
|
+
|
|
130
|
+
return field;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function removeDuplicateValues(str) {
|
|
134
|
+
const arr = str.split('');
|
|
135
|
+
// Take only the first instance of a proper value-carrying character
|
|
136
|
+
const reducedStr = arr.filter((c, i) => c === ' ' || c === '|' || arr.indexOf(c) === i).join('');
|
|
137
|
+
//console.info(`I: '${str}'`); // eslint-disable-line no-console
|
|
138
|
+
//console.info(`M: '${reducedStr}'`); // eslint-disable-line no-console
|
|
139
|
+
const output = `${reducedStr}${' '.repeat(str.length - reducedStr.length)}`; // Had some weird trouble with str.padEnd(n)
|
|
140
|
+
//console.info(`M: '${output}'`); // eslint-disable-line no-console
|
|
141
|
+
return output;
|
|
142
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {expect} from 'chai';
|
|
2
|
+
import {MarcRecord} from '@natlibfi/marc-record';
|
|
3
|
+
import validatorFactory from './field-008-18-34-character-groups';
|
|
4
|
+
import {READERS} from '@natlibfi/fixura';
|
|
5
|
+
import generateTests from '@natlibfi/fixugen';
|
|
6
|
+
import createDebugLogger from 'debug';
|
|
7
|
+
|
|
8
|
+
generateTests({
|
|
9
|
+
callback,
|
|
10
|
+
path: [__dirname, '..', 'test-fixtures', 'field-008-18-34-character-groups'],
|
|
11
|
+
useMetadataFile: true,
|
|
12
|
+
recurse: false,
|
|
13
|
+
fixura: {
|
|
14
|
+
reader: READERS.JSON
|
|
15
|
+
},
|
|
16
|
+
mocha: {
|
|
17
|
+
before: () => testValidatorFactory()
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/field-008-18-34-character-groups:test');
|
|
21
|
+
|
|
22
|
+
async function testValidatorFactory() {
|
|
23
|
+
const validator = await validatorFactory();
|
|
24
|
+
|
|
25
|
+
expect(validator)
|
|
26
|
+
.to.be.an('object')
|
|
27
|
+
.that.has.any.keys('description', 'validate');
|
|
28
|
+
|
|
29
|
+
expect(validator.description).to.be.a('string');
|
|
30
|
+
expect(validator.validate).to.be.a('function');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function callback({getFixture, enabled = true, fix = false}) {
|
|
34
|
+
if (enabled === false) {
|
|
35
|
+
debug('TEST SKIPPED!');
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const validator = await validatorFactory();
|
|
40
|
+
const record = new MarcRecord(getFixture('record.json'));
|
|
41
|
+
const expectedResult = getFixture('expectedResult.json');
|
|
42
|
+
// console.log(expectedResult); // eslint-disable-line
|
|
43
|
+
|
|
44
|
+
if (!fix) {
|
|
45
|
+
const result = await validator.validate(record);
|
|
46
|
+
expect(result).to.eql(expectedResult);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
await validator.fix(record);
|
|
51
|
+
expect(record).to.eql(expectedResult);
|
|
52
|
+
}
|