@natlibfi/marc-record-validators-melinda 10.13.0 → 10.13.1-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/melinda-node-tests.yml +3 -3
- package/dist/field-505-separators.js +77 -0
- package/dist/field-505-separators.js.map +1 -0
- package/dist/field-505-separators.spec.js +51 -0
- package/dist/field-505-separators.spec.js.map +1 -0
- package/dist/fixRelatorTerms.js +278 -0
- package/dist/fixRelatorTerms.js.map +1 -0
- package/dist/fixRelatorTerms.spec.js +51 -0
- package/dist/fixRelatorTerms.spec.js.map +1 -0
- package/dist/index.js +101 -3
- package/dist/index.js.map +1 -1
- package/dist/normalize-qualifying-information.js +97 -0
- package/dist/normalize-qualifying-information.js.map +1 -0
- package/dist/normalize-qualifying-information.spec.js +51 -0
- package/dist/normalize-qualifying-information.spec.js.map +1 -0
- package/dist/normalizeSubfieldValueForComparison.js +12 -3
- package/dist/normalizeSubfieldValueForComparison.js.map +1 -1
- package/dist/prepublicationUtils.js +8 -26
- package/dist/prepublicationUtils.js.map +1 -1
- package/dist/punctuation2.js +7 -2
- package/dist/punctuation2.js.map +1 -1
- package/dist/removeInferiorDataFields.js +69 -10
- package/dist/removeInferiorDataFields.js.map +1 -1
- package/dist/utils.js +12 -0
- package/dist/utils.js.map +1 -1
- package/package.json +11 -11
- package/src/field-505-separators.js +75 -0
- package/src/field-505-separators.spec.js +52 -0
- package/src/fixRelatorTerms.js +233 -0
- package/src/fixRelatorTerms.spec.js +52 -0
- package/src/index.js +33 -4
- package/src/normalize-qualifying-information.js +92 -0
- package/src/normalize-qualifying-information.spec.js +52 -0
- package/src/normalizeSubfieldValueForComparison.js +14 -3
- package/src/prepublicationUtils.js +8 -25
- package/src/punctuation2.js +3 -2
- package/src/removeInferiorDataFields.js +70 -10
- package/src/utils.js +12 -0
- package/test-fixtures/field-505-separators/01/expectedResult.json +7 -0
- package/test-fixtures/field-505-separators/01/metadata.json +7 -0
- package/test-fixtures/field-505-separators/01/record.json +25 -0
- package/test-fixtures/field-505-separators/02/expectedResult.json +27 -0
- package/test-fixtures/field-505-separators/02/metadata.json +7 -0
- package/test-fixtures/field-505-separators/02/record.json +25 -0
- package/test-fixtures/fix-relator-terms/f01/expectedResult.json +14 -0
- package/test-fixtures/fix-relator-terms/f01/metadata.json +6 -0
- package/test-fixtures/fix-relator-terms/f01/record.json +13 -0
- package/test-fixtures/fix-relator-terms/f01b/expectedResult.json +12 -0
- package/test-fixtures/fix-relator-terms/f01b/metadata.json +6 -0
- package/test-fixtures/fix-relator-terms/f01b/record.json +11 -0
- package/test-fixtures/fix-relator-terms/f02/expectedResult.json +12 -0
- package/test-fixtures/fix-relator-terms/f02/metadata.json +6 -0
- package/test-fixtures/fix-relator-terms/f02/record.json +11 -0
- package/test-fixtures/normalize-qualifying-information/01/expectedResult.json +8 -0
- package/test-fixtures/normalize-qualifying-information/01/metadata.json +7 -0
- package/test-fixtures/normalize-qualifying-information/01/record.json +25 -0
- package/test-fixtures/normalize-qualifying-information/02/expectedResult.json +27 -0
- package/test-fixtures/normalize-qualifying-information/02/metadata.json +7 -0
- package/test-fixtures/normalize-qualifying-information/02/record.json +25 -0
- package/test-fixtures/punctuation2/97/expectedResult.json +6 -1
- package/test-fixtures/punctuation2/97/record.json +5 -0
- package/test-fixtures/remove-inferior-datafields/f09/expectedResult.json +20 -0
- package/test-fixtures/remove-inferior-datafields/f09/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f09/record.json +30 -0
- package/test-fixtures/remove-inferior-datafields/f10/expectedResult.json +17 -0
- package/test-fixtures/remove-inferior-datafields/f10/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f10/record.json +27 -0
- package/test-fixtures/remove-inferior-datafields/f11/expectedResult.json +14 -0
- package/test-fixtures/remove-inferior-datafields/f11/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f11/record.json +18 -0
- package/test-fixtures/strip-punctuation/98/expectedResult.json +5 -0
- package/test-fixtures/strip-punctuation/98/record.json +5 -0
package/package.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"url": "git@github.com:natlibfi/marc-record-validators-melinda.git"
|
|
15
15
|
},
|
|
16
16
|
"license": "MIT",
|
|
17
|
-
"version": "10.13.
|
|
17
|
+
"version": "10.13.1-alpha.2",
|
|
18
18
|
"main": "./dist/index.js",
|
|
19
19
|
"publishConfig": {
|
|
20
20
|
"access": "public"
|
|
@@ -35,35 +35,35 @@
|
|
|
35
35
|
"build": "babel src --source-maps --copy-files --delete-dir-on-start --out-dir=dist"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@babel/register": "^7.22.
|
|
38
|
+
"@babel/register": "^7.22.15",
|
|
39
39
|
"@natlibfi/issn-verify": "^1.0.3",
|
|
40
40
|
"@natlibfi/marc-record": "^7.3.1",
|
|
41
41
|
"@natlibfi/marc-record-validate": "^8.0.1",
|
|
42
42
|
"cld3-asm": "^3.1.1",
|
|
43
|
+
"clone": "^2.1.2",
|
|
43
44
|
"debug": "^4.3.4",
|
|
44
|
-
"isbn3": "^1.1.
|
|
45
|
+
"isbn3": "^1.1.41",
|
|
45
46
|
"langs": "^2.0.0",
|
|
46
47
|
"node-fetch": "^2.7.0",
|
|
47
|
-
"xml2js": "
|
|
48
|
-
"clone": "^2.1.2"
|
|
48
|
+
"xml2js": "^0.6.2"
|
|
49
49
|
},
|
|
50
50
|
"peerDependencies": {
|
|
51
51
|
"@natlibfi/marc-record-validate": "^8.0.1"
|
|
52
52
|
},
|
|
53
53
|
"devDependencies": {
|
|
54
|
-
"@babel/cli": "^7.22.
|
|
55
|
-
"@babel/core": "^7.22.
|
|
56
|
-
"@babel/eslint-parser": "^7.22.
|
|
57
|
-
"@babel/preset-env": "^7.22.
|
|
54
|
+
"@babel/cli": "^7.22.15",
|
|
55
|
+
"@babel/core": "^7.22.17",
|
|
56
|
+
"@babel/eslint-parser": "^7.22.15",
|
|
57
|
+
"@babel/preset-env": "^7.22.15",
|
|
58
58
|
"@natlibfi/eslint-config-melinda-backend": "^3.0.1",
|
|
59
59
|
"@natlibfi/fixugen": "^2.0.1",
|
|
60
60
|
"@natlibfi/fixura": "^3.0.1",
|
|
61
61
|
"babel-plugin-istanbul": "^6.1.1",
|
|
62
62
|
"babel-plugin-rewire": "^1.2.0",
|
|
63
|
-
"chai": "^4.3.
|
|
63
|
+
"chai": "^4.3.8",
|
|
64
64
|
"chai-as-promised": "^7.1.1",
|
|
65
65
|
"cross-env": "^7.0.3",
|
|
66
|
-
"eslint": "^8.
|
|
66
|
+
"eslint": "^8.49.0",
|
|
67
67
|
"fetch-mock": "^9.11.0",
|
|
68
68
|
"mocha": "^10.2.0",
|
|
69
69
|
"nyc": "^15.1.0"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
//import createDebugLogger from 'debug';
|
|
2
|
+
import clone from 'clone';
|
|
3
|
+
import {fieldToString} from './utils';
|
|
4
|
+
|
|
5
|
+
// Author(s): Nicholas Volk
|
|
6
|
+
export default function () {
|
|
7
|
+
|
|
8
|
+
return {
|
|
9
|
+
description: 'Normalize " ; " separators as " -- "',
|
|
10
|
+
validate, fix
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
function fix(record) {
|
|
14
|
+
record.fields.forEach(field => {
|
|
15
|
+
fix505(field);
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
const res = {message: [], fix: [], valid: true};
|
|
19
|
+
return res;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function validate(record) {
|
|
23
|
+
const res = {message: []};
|
|
24
|
+
|
|
25
|
+
record.fields?.forEach(field => {
|
|
26
|
+
validateField(field, res);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
|
|
30
|
+
return res;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function validateField(field, res) {
|
|
34
|
+
const orig = fieldToString(field);
|
|
35
|
+
|
|
36
|
+
const normalizedField = fix505(clone(field));
|
|
37
|
+
const mod = fieldToString(normalizedField);
|
|
38
|
+
if (orig !== mod) { // Fail as the input is "broken"/"crap"/sumthing
|
|
39
|
+
res.message.push(`'TODO: ${orig}' => '${mod}'`); // eslint-disable-line functional/immutable-data
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
function field505FixSubfieldA(field) {
|
|
48
|
+
const a = field.subfields.filter(sf => sf.code === 'a');
|
|
49
|
+
|
|
50
|
+
a.forEach(sf => fixSubfieldA(sf));
|
|
51
|
+
|
|
52
|
+
function fixSubfieldA(a) {
|
|
53
|
+
a.value = a.value.replace(/ ; /ug, ' -- '); // eslint-disable-line functional/immutable-data
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function field505FixSubfieldTRG(field) {
|
|
58
|
+
// Modify subfield if next subfield is $t:
|
|
59
|
+
const subfieldsThatWillBeModified = field.subfields.filter((sf, i) => i + 1 < field.subfields.length && field.subfields[i + 1].code === 't');
|
|
60
|
+
|
|
61
|
+
subfieldsThatWillBeModified.forEach(sf => fixSubfieldThatPrecedesT(sf));
|
|
62
|
+
|
|
63
|
+
function fixSubfieldThatPrecedesT(sf) {
|
|
64
|
+
sf.value = sf.value.replace(/ ;$/u, ' --'); // eslint-disable-line functional/immutable-data
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function fix505(field) {
|
|
69
|
+
if (field.tag !== '505' || !field.subfields) {
|
|
70
|
+
return field;
|
|
71
|
+
}
|
|
72
|
+
field505FixSubfieldTRG(field);
|
|
73
|
+
field505FixSubfieldA(field);
|
|
74
|
+
return field;
|
|
75
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {expect} from 'chai';
|
|
2
|
+
import {MarcRecord} from '@natlibfi/marc-record';
|
|
3
|
+
import validatorFactory from './field-505-separators';
|
|
4
|
+
import {READERS} from '@natlibfi/fixura';
|
|
5
|
+
import generateTests from '@natlibfi/fixugen';
|
|
6
|
+
import createDebugLogger from 'debug';
|
|
7
|
+
|
|
8
|
+
generateTests({
|
|
9
|
+
callback,
|
|
10
|
+
path: [__dirname, '..', 'test-fixtures', 'field-505-separators'],
|
|
11
|
+
useMetadataFile: true,
|
|
12
|
+
recurse: false,
|
|
13
|
+
fixura: {
|
|
14
|
+
reader: READERS.JSON
|
|
15
|
+
},
|
|
16
|
+
mocha: {
|
|
17
|
+
before: () => testValidatorFactory()
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/field-505-separators:test');
|
|
21
|
+
|
|
22
|
+
async function testValidatorFactory() {
|
|
23
|
+
const validator = await validatorFactory();
|
|
24
|
+
|
|
25
|
+
expect(validator)
|
|
26
|
+
.to.be.an('object')
|
|
27
|
+
.that.has.any.keys('description', 'validate');
|
|
28
|
+
|
|
29
|
+
expect(validator.description).to.be.a('string');
|
|
30
|
+
expect(validator.validate).to.be.a('function');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function callback({getFixture, enabled = true, fix = false}) {
|
|
34
|
+
if (enabled === false) {
|
|
35
|
+
debug('TEST SKIPPED!');
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const validator = await validatorFactory();
|
|
40
|
+
const record = new MarcRecord(getFixture('record.json'));
|
|
41
|
+
const expectedResult = getFixture('expectedResult.json');
|
|
42
|
+
// console.log(expectedResult); // eslint-disable-line
|
|
43
|
+
|
|
44
|
+
if (!fix) {
|
|
45
|
+
const result = await validator.validate(record);
|
|
46
|
+
expect(result).to.eql(expectedResult);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
await validator.fix(record);
|
|
51
|
+
expect(record).to.eql(expectedResult);
|
|
52
|
+
}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import clone from 'clone';
|
|
2
|
+
import {fieldFixPunctuation} from './punctuation2';
|
|
3
|
+
import {fieldToString, getCatalogingLanguage, nvdebug, subfieldToString} from './utils';
|
|
4
|
+
import createDebugLogger from 'debug';
|
|
5
|
+
|
|
6
|
+
// Currently mainly translates X00$e values, so that we don't have "$a Name, $e kirjoittaja, $e författare.".
|
|
7
|
+
// Later on we could try and handle $4 stuff here as well.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:fixRelatorterms');
|
|
11
|
+
//const debugData = debug.extend('data');
|
|
12
|
+
const debugDev = debug.extend('dev');
|
|
13
|
+
|
|
14
|
+
export default function () {
|
|
15
|
+
return {
|
|
16
|
+
description: 'Fix $e subfields in field [1678][01]0 and 720',
|
|
17
|
+
validate, fix
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
function fix(record) {
|
|
21
|
+
const res = {message: [], fix: [], valid: true};
|
|
22
|
+
|
|
23
|
+
const language = getCatalogingLanguage(record);
|
|
24
|
+
|
|
25
|
+
record.fields.forEach(field => {
|
|
26
|
+
fieldFixRelatorTerms(field, language, language);
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
return res;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function validate(record) {
|
|
33
|
+
const res = {message: []};
|
|
34
|
+
|
|
35
|
+
const language = getCatalogingLanguage(record);
|
|
36
|
+
|
|
37
|
+
record.fields.forEach(field => {
|
|
38
|
+
const clonedField = clone(field);
|
|
39
|
+
// Rather hackily/abnormally use language as both fromLanguage and toLanguage.
|
|
40
|
+
// fromLanguage is used to expand "esitt." => "esittäjä".
|
|
41
|
+
// toLanguage is used by translations (fixes "författere" to "kirjoittaja", if 040$b is "fin")
|
|
42
|
+
fieldFixRelatorTerms(field, language, language);
|
|
43
|
+
const clonedFieldAsString = fieldToString(clonedField);
|
|
44
|
+
const fieldAsString = fieldToString(field);
|
|
45
|
+
if (fieldAsString !== clonedFieldAsString) { // eslint-disable-line functional/no-conditional-statements
|
|
46
|
+
res.message.push(`${fieldAsString} => ${clonedFieldAsString}`); // eslint-disable-line functional/immutable-data
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
|
|
51
|
+
return res;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
/*
|
|
57
|
+
export default () => (base, source) => {
|
|
58
|
+
recordTranslateRelatorTerms(base);
|
|
59
|
+
recordTranslateRelatorTerms(source);
|
|
60
|
+
recordHandleRelatorTermAbbreviations(base);
|
|
61
|
+
recordHandleRelatorTermAbbreviations(source);
|
|
62
|
+
const result = {base, source};
|
|
63
|
+
return result;
|
|
64
|
+
};
|
|
65
|
+
*/
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
// Partial source: https://marc21.kansalliskirjasto.fi/funktiot_koodit.htm
|
|
69
|
+
// https://wiki.helsinki.fi/display/MARC21svenska/Funktions-+och+relationskoder+-+alfabetiskt+efter+funktion
|
|
70
|
+
// New, better source: https://id.kb.se/find?q=relator&_sort=_sortKeyByLang.en
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
// NB! How to handle German sex-based Verfasser/Verfasserin pairs?
|
|
74
|
+
const relatorTerms = [
|
|
75
|
+
{'code': 'arr', 'eng': 'arranger', 'fin': 'sovittaja', 'swe': 'arrangör av musikalisk komposition'},
|
|
76
|
+
{'code': 'art', 'eng': 'artist', 'fin': 'taiteilija', 'swe': 'konstnär'},
|
|
77
|
+
{'code': 'aui', 'eng': 'author of introduction', 'fin': 'esipuheen tekijä'},
|
|
78
|
+
{'code': 'aut', 'eng': 'author', 'fin': 'kirjoittaja', 'swe': 'författare'},
|
|
79
|
+
{'code': 'cmp', 'eng': 'composer', 'fin': 'säveltäjä', 'swe': 'kompositör'},
|
|
80
|
+
{'code': 'drt', 'eng': 'director', 'fin': 'ohjaaja', 'swe': 'regissör'},
|
|
81
|
+
{'code': 'edt', 'eng': 'editor', 'fin': 'toimittaja', 'swe': 'redaktör'},
|
|
82
|
+
{'code': 'ill', 'eng': 'illustrator', 'fin': 'kuvittaja', 'swe': 'illustratör'},
|
|
83
|
+
{'code': 'lyr', 'eng': 'lyricist', 'fin': 'sanoittaja', 'swe': 'sångtext'},
|
|
84
|
+
{'code': 'nrt', 'eng': 'narrator', 'fin': 'kertoja', 'swe': 'berättare'}, // berättare/inläsare
|
|
85
|
+
{'code': 'pbl', 'eng': 'publisher', 'fin': 'julkaisija', 'swe': 'utgivare'},
|
|
86
|
+
{'code': 'pht', 'eng': 'photographer', 'fin': 'valokuvaaja', 'swe': 'fotograf'},
|
|
87
|
+
{'code': 'prf', 'eng': 'performer', 'fin': 'esittäjä', 'swe': 'framförande'},
|
|
88
|
+
{'code': 'pro', 'eng': 'producer', 'fin': 'tuottaja', 'swe': 'producent'},
|
|
89
|
+
{'code': 'trl', 'eng': 'translator', 'fin': 'kääntäjä', 'swe': 'översättare'}
|
|
90
|
+
];
|
|
91
|
+
|
|
92
|
+
/*
|
|
93
|
+
function recordNormalizeRelatorTerms(record, defaultLanguageCode = undef) {
|
|
94
|
+
const languageCode = defaultLanguageCode ? defaultLanguageCode : getCatalogingLanguage(record);
|
|
95
|
+
if (!languageCode || ['eng', 'fin', 'swe'].includes(languageCode)) {
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
}
|
|
100
|
+
*/
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
const finnishAbbreviations = {
|
|
104
|
+
'esitt.': 'esittäjä',
|
|
105
|
+
'käänt.': 'kääntäjä',
|
|
106
|
+
'näytt.': 'näyttelijä',
|
|
107
|
+
'san.': 'sanoittaja',
|
|
108
|
+
'sov.': 'sovittaja',
|
|
109
|
+
'säv.': 'säveltäjä',
|
|
110
|
+
'toim.': 'toimittaja'
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
function subfieldHandleRelatorTermAbbreviation(subfield, language) {
|
|
114
|
+
if (subfield.code !== 'e') {
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
nvdebug(`Relator cand subfield: '${subfieldToString(subfield)}', lang: ${language ? language : 'NULL'}`, debugDev);
|
|
118
|
+
if (language === null || language === 'mul') {
|
|
119
|
+
subfieldHandleRelatorTermAbbreviation(subfield, 'fin');
|
|
120
|
+
// Maybe later add Swedish here...
|
|
121
|
+
return;
|
|
122
|
+
}
|
|
123
|
+
const value = subfield.value.replace(/,$/u, '');
|
|
124
|
+
const punc = value === subfield.value ? '' : ',';
|
|
125
|
+
|
|
126
|
+
const lcValue = value.toLowerCase(); // Check Å, Ä, Ö...
|
|
127
|
+
|
|
128
|
+
// NB: Policy: if no language or multi-language: apply all rules! (Not much overlap I hope...)
|
|
129
|
+
if (language === 'fin') {
|
|
130
|
+
nvdebug(`Relator try Finnish for '${lcValue}}'...`, debugDev);
|
|
131
|
+
if (lcValue in finnishAbbreviations) {
|
|
132
|
+
const hit = `${finnishAbbreviations[lcValue]}${punc}`;
|
|
133
|
+
nvdebug(`Relator hit: ${hit}`, debugDev);
|
|
134
|
+
// NB! 'esitt.' => 'esittäjä'
|
|
135
|
+
subfield.value = hit; // eslint-disable-line functional/immutable-data
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
function isRelatorField(field) {
|
|
143
|
+
// Tag list might be incomplete!
|
|
144
|
+
return field.tag.match(/^(?:100|110|600|610|700|710|720|800|810)$/u);
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function fieldHandleRelatorTermAbbreviations(field, language) {
|
|
148
|
+
if (!isRelatorField(field)) {
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const originalValue = fieldToString(field);
|
|
153
|
+
field.subfields.forEach(sf => subfieldHandleRelatorTermAbbreviation(sf, language));
|
|
154
|
+
const modifiedValue = fieldToString(field);
|
|
155
|
+
if (modifiedValue === originalValue) {
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
// Changes have happened... Try to punctuate.
|
|
159
|
+
// (NB! We need punctuation as a module, if we are to make abbr expansion a marc-record-validators-melinda validator/fixer)
|
|
160
|
+
fieldFixPunctuation(field);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function termIsInGivenLanguage(term, language) {
|
|
164
|
+
return relatorTerms.some(row => language in row && row[language] === term);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
function anyToLanguage(originalTerm) {
|
|
168
|
+
// Sometimes there's no 040$b or 040$b and, say, 040$b and 700$e value don't correlate
|
|
169
|
+
if (termIsInGivenLanguage(originalTerm, 'fin')) {
|
|
170
|
+
return 'fin';
|
|
171
|
+
}
|
|
172
|
+
if (termIsInGivenLanguage(originalTerm, 'swe')) {
|
|
173
|
+
return 'swe';
|
|
174
|
+
}
|
|
175
|
+
if (termIsInGivenLanguage(originalTerm, 'eng')) {
|
|
176
|
+
return 'eng';
|
|
177
|
+
}
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function translateRelatorTerm(originalTerm, defaultFromLanguage, toLanguage) {
|
|
182
|
+
|
|
183
|
+
// originalTerm is supposed to be normal version (abbrs have been expanded), possibly with punctuation
|
|
184
|
+
const term = originalTerm.replace(/[,.]$/u, '');
|
|
185
|
+
nvdebug(`Try to translate '${term}' from ${defaultFromLanguage} to ${toLanguage}`, debugDev);
|
|
186
|
+
|
|
187
|
+
// Kind of hacky... If term is in toLanguage, do nothing. defaultFromLanguage (040$b) isn't that reliable.
|
|
188
|
+
if (termIsInGivenLanguage(term, toLanguage)) {
|
|
189
|
+
return originalTerm;
|
|
190
|
+
}
|
|
191
|
+
// defaultFomLanguage (typically 040$b) isn't that reliable:
|
|
192
|
+
const fromLanguage = defaultFromLanguage === null || !termIsInGivenLanguage(term, defaultFromLanguage) ? anyToLanguage(term) : defaultFromLanguage;
|
|
193
|
+
|
|
194
|
+
const [candRow] = relatorTerms.filter(row => fromLanguage in row && toLanguage in row && row[fromLanguage] === term);
|
|
195
|
+
if (candRow) {
|
|
196
|
+
const punc = term === originalTerm ? '' : originalTerm.slice(-1);
|
|
197
|
+
const translation = `${candRow[toLanguage]}${punc}`;
|
|
198
|
+
nvdebug(`Translate relator term: ${originalTerm} => ${translation}`, debugDev);
|
|
199
|
+
return translation;
|
|
200
|
+
}
|
|
201
|
+
return originalTerm;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function subfieldTranslateRelatorTerm(subfield, fromLanguage, toLanguage) {
|
|
205
|
+
if (subfield.code !== 'e') {
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
subfield.value = translateRelatorTerm(subfield.value, fromLanguage, toLanguage); // eslint-disable-line functional/immutable-data
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
export function fieldFixRelatorTerms(field, fromLanguage, toLanguage) {
|
|
212
|
+
// fromLanguage can not be relied upon.
|
|
213
|
+
if (!isRelatorField(field)/* || fromLanguage === toLanguage*/) {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
fieldHandleRelatorTermAbbreviations(field, fromLanguage);
|
|
217
|
+
|
|
218
|
+
field.subfields.forEach(sf => subfieldTranslateRelatorTerm(sf, fromLanguage, toLanguage));
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
export function recordFixRelatorTerms(record, defaultToLanguage = null, defaultFromLanguage = null) { // WAS: translateRecord()
|
|
223
|
+
const fromLanguage = defaultFromLanguage ? defaultFromLanguage : getCatalogingLanguage(record);
|
|
224
|
+
const toLanguage = defaultToLanguage ? defaultToLanguage : getCatalogingLanguage(record);
|
|
225
|
+
|
|
226
|
+
record.fields.forEach(field => translateField(field, fromLanguage, toLanguage));
|
|
227
|
+
|
|
228
|
+
function translateField(field, from, to) {
|
|
229
|
+
fieldFixRelatorTerms(field, from, to);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {expect} from 'chai';
|
|
2
|
+
import {MarcRecord} from '@natlibfi/marc-record';
|
|
3
|
+
import validatorFactory from './fixRelatorTerms';
|
|
4
|
+
import {READERS} from '@natlibfi/fixura';
|
|
5
|
+
import generateTests from '@natlibfi/fixugen';
|
|
6
|
+
import createDebugLogger from 'debug';
|
|
7
|
+
|
|
8
|
+
generateTests({
|
|
9
|
+
callback,
|
|
10
|
+
path: [__dirname, '..', 'test-fixtures', 'fix-relator-terms'],
|
|
11
|
+
useMetadataFile: true,
|
|
12
|
+
recurse: false,
|
|
13
|
+
fixura: {
|
|
14
|
+
reader: READERS.JSON
|
|
15
|
+
},
|
|
16
|
+
mocha: {
|
|
17
|
+
before: () => testValidatorFactory()
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/fixRelatorTerms:test');
|
|
21
|
+
|
|
22
|
+
async function testValidatorFactory() {
|
|
23
|
+
const validator = await validatorFactory();
|
|
24
|
+
|
|
25
|
+
expect(validator)
|
|
26
|
+
.to.be.an('object')
|
|
27
|
+
.that.has.any.keys('description', 'validate');
|
|
28
|
+
|
|
29
|
+
expect(validator.description).to.be.a('string');
|
|
30
|
+
expect(validator.validate).to.be.a('function');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function callback({getFixture, enabled = true, fix = false}) {
|
|
34
|
+
if (enabled === false) {
|
|
35
|
+
debug('TEST SKIPPED!');
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const validator = await validatorFactory();
|
|
40
|
+
const record = new MarcRecord(getFixture('record.json'));
|
|
41
|
+
const expectedResult = getFixture('expectedResult.json');
|
|
42
|
+
// console.log(expectedResult); // eslint-disable-line
|
|
43
|
+
|
|
44
|
+
if (!fix) {
|
|
45
|
+
const result = await validator.validate(record);
|
|
46
|
+
expect(result).to.eql(expectedResult);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
await validator.fix(record);
|
|
51
|
+
expect(record).to.eql(expectedResult);
|
|
52
|
+
}
|
package/src/index.js
CHANGED
|
@@ -4,27 +4,42 @@ import DuplicatesInd1 from './duplicates-ind1';
|
|
|
4
4
|
import EmptyFields from './empty-fields';
|
|
5
5
|
import EndingPunctuation from './ending-punctuation';
|
|
6
6
|
import EndingWhitespace from './ending-whitespace';
|
|
7
|
+
import Field505Separators from './field-505-separators';
|
|
7
8
|
import Field521Fix from './field-521-fix';
|
|
8
|
-
import
|
|
9
|
+
import FieldExclusion from './field-exclusion';
|
|
9
10
|
import FieldStructure from './field-structure';
|
|
11
|
+
import FieldsPresent from './fields-present';
|
|
10
12
|
import FixedFields from './fixed-fields';
|
|
11
|
-
import
|
|
13
|
+
import FixRelatorTerms from './fixRelatorTerms';
|
|
12
14
|
import IdenticalFields from './identical-fields';
|
|
15
|
+
import IndicatorFixes from './indicator-fixes';
|
|
13
16
|
import IsbnIssn from './isbn-issn';
|
|
14
17
|
import ItemLanguage from './item-language';
|
|
18
|
+
import MergeField500Lisapainokset from './mergeField500Lisapainokset';
|
|
19
|
+
import MergeRelatorTermFields from './mergeRelatorTermFields';
|
|
20
|
+
import MultipleSubfield0s from './multiple-subfield-0';
|
|
15
21
|
import NonBreakingSpace from './non-breaking-space';
|
|
22
|
+
import NormalizeIdentifiers from './normalize-identifiers';
|
|
23
|
+
import NormalizeQualifyingInformation from './normalize-qualifying-information';
|
|
16
24
|
import NormalizeUTF8Diacritics from './normalize-utf8-diacritics';
|
|
17
25
|
import Punctuation from './punctuation/';
|
|
18
|
-
import
|
|
26
|
+
import Punctuation2 from './punctuation2';
|
|
27
|
+
import ResolveOrphanedSubfield6s from './resolveOrphanedSubfield6s'; // Do this before reindexing! (thus not in alphabetical order)
|
|
19
28
|
import ReindexSubfield6OccurenceNumbers from './reindexSubfield6OccurenceNumbers';
|
|
29
|
+
import RemoveDuplicateDataFields from './removeDuplicateDataFields';
|
|
30
|
+
import RemoveInferiorDataFields from './removeInferiorDataFields';
|
|
20
31
|
import ResolvableExtReferences from './resolvable-ext-references-melinda';
|
|
21
32
|
import SanitizeVocabularySourceCodes from './sanitize-vocabulary-source-codes';
|
|
22
|
-
import
|
|
33
|
+
import SortRelatorTerms from './sortRelatorTerms';
|
|
23
34
|
import SortSubfields from './sortSubfields';
|
|
24
35
|
import SortTags from './sort-tags';
|
|
36
|
+
// import StripPunctuation from './stripPunctuation'; // Can we add this here? Should be used very cautiosly!
|
|
37
|
+
import SubfieldValueNormalizations from './subfieldValueNormalizations';
|
|
25
38
|
import SubfieldExclusion from './subfield-exclusion';
|
|
39
|
+
import Sync007And300 from './sync-007-and-300';
|
|
26
40
|
import TypeOfDateF008 from './typeOfDate-008';
|
|
27
41
|
import UnicodeDecomposition from './unicode-decomposition';
|
|
42
|
+
import UpdateField540 from './update-field-540';
|
|
28
43
|
import Urn from './urn';
|
|
29
44
|
|
|
30
45
|
export {
|
|
@@ -34,24 +49,38 @@ export {
|
|
|
34
49
|
EmptyFields,
|
|
35
50
|
EndingPunctuation,
|
|
36
51
|
EndingWhitespace,
|
|
52
|
+
Field505Separators,
|
|
37
53
|
Field521Fix,
|
|
38
54
|
FieldExclusion,
|
|
39
55
|
FieldsPresent,
|
|
40
56
|
FieldStructure,
|
|
57
|
+
FixRelatorTerms,
|
|
41
58
|
FixedFields,
|
|
42
59
|
IdenticalFields,
|
|
60
|
+
IndicatorFixes,
|
|
43
61
|
IsbnIssn,
|
|
44
62
|
ItemLanguage,
|
|
63
|
+
MergeField500Lisapainokset,
|
|
64
|
+
MergeRelatorTermFields,
|
|
65
|
+
MultipleSubfield0s,
|
|
45
66
|
NonBreakingSpace,
|
|
67
|
+
NormalizeIdentifiers,
|
|
68
|
+
NormalizeQualifyingInformation,
|
|
46
69
|
NormalizeUTF8Diacritics,
|
|
47
70
|
Punctuation,
|
|
71
|
+
Punctuation2,
|
|
48
72
|
ResolveOrphanedSubfield6s,
|
|
49
73
|
ReindexSubfield6OccurenceNumbers,
|
|
74
|
+
RemoveDuplicateDataFields,
|
|
75
|
+
RemoveInferiorDataFields,
|
|
50
76
|
ResolvableExtReferences,
|
|
51
77
|
SanitizeVocabularySourceCodes,
|
|
78
|
+
SortRelatorTerms,
|
|
52
79
|
SortSubfields,
|
|
53
80
|
SortTags,
|
|
54
81
|
SubfieldExclusion,
|
|
82
|
+
SubfieldValueNormalizations,
|
|
83
|
+
Sync007And300,
|
|
55
84
|
TypeOfDateF008,
|
|
56
85
|
UnicodeDecomposition,
|
|
57
86
|
UpdateField540,
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
//import createDebugLogger from 'debug';
|
|
2
|
+
import clone from 'clone';
|
|
3
|
+
import {fieldToString} from './utils';
|
|
4
|
+
|
|
5
|
+
// Author(s): Nicholas Volk
|
|
6
|
+
export default function () {
|
|
7
|
+
|
|
8
|
+
return {
|
|
9
|
+
// Fixes MELINDA-8740
|
|
10
|
+
description: 'Normalize qualifying information (020$q, 015$q, 024$q, 028$q)',
|
|
11
|
+
validate, fix
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
function fix(record) {
|
|
15
|
+
record.fields.forEach(field => {
|
|
16
|
+
normalizeQualifyingInformationField(field);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
const res = {message: [], fix: [], valid: true};
|
|
20
|
+
return res;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function validate(record) {
|
|
24
|
+
const res = {message: []};
|
|
25
|
+
|
|
26
|
+
record.fields?.forEach(field => {
|
|
27
|
+
validateField(field, res);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
res.valid = !(res.message.length >= 1); // eslint-disable-line functional/immutable-data
|
|
31
|
+
return res;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function validateField(field, res) {
|
|
35
|
+
const orig = fieldToString(field);
|
|
36
|
+
|
|
37
|
+
const normalizedField = clone(field);
|
|
38
|
+
normalizeQualifyingInformationField(normalizedField);
|
|
39
|
+
const mod = fieldToString(normalizedField);
|
|
40
|
+
if (orig !== mod) { // Fail as the input is "broken"/"crap"/sumthing
|
|
41
|
+
res.message.push(`'TODO: ${orig}' => '${mod}'`); // eslint-disable-line functional/immutable-data
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
function normalizeQualifyingInformationField(field) {
|
|
50
|
+
if (!['015', '020', '024', '028'].includes(field.tag) || !field.subfields) {
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
field.subfields.forEach(sf => fixSubfield(sf));
|
|
55
|
+
return;
|
|
56
|
+
|
|
57
|
+
function fixSubfield(sf) {
|
|
58
|
+
if (sf.code !== 'q') {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
sf.value = normalizeValue(sf.value); // eslint-disable-line functional/immutable-data
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function normalizeValue(val) {
|
|
65
|
+
// Should we do English as well: "coil bound" and "comb-bound" => "spiral-bound" (as per MTS)?
|
|
66
|
+
|
|
67
|
+
if (val.match(/^(?:hft|häftad)[.,]*$/iu)) { // MELINDA-8740
|
|
68
|
+
return 'mjuka pärmar';
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
if (val.match(/^inb(?:\.|unden)[.,]*$/iu)) { // MELINDA-8740
|
|
72
|
+
return 'hårda pärmar';
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if (val === 'rengaskirja') { // https://www.kiwi.fi/display/melinda/Talonmies+tiedottaa+16.12.2021
|
|
76
|
+
return 'kierreselkä';
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (val === 'ringpärm') { // https://www.kiwi.fi/display/melinda/Talonmies+tiedottaa+16.12.2021
|
|
80
|
+
return 'spiralrygg';
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (val.match(/^nid(?:\.|ottu)[.,]*$/iu)) { // MELINDA-8740
|
|
84
|
+
return 'pehmeäkantinen';
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (val.match(/^sid(?:\.|ottu)[.,]*$/iu)) { // MELINDA-8740
|
|
88
|
+
return 'kovakantinen';
|
|
89
|
+
}
|
|
90
|
+
return val;
|
|
91
|
+
}
|
|
92
|
+
}
|