@natlibfi/marc-record-validators-melinda 11.4.8 → 11.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/disambiguateSeriesStatements.js +200 -0
- package/dist/disambiguateSeriesStatements.js.map +1 -0
- package/dist/disambiguateSeriesStatements.spec.js +51 -0
- package/dist/disambiguateSeriesStatements.spec.js.map +1 -0
- package/dist/fix-country-codes.spec.js +1 -1
- package/dist/fix-country-codes.spec.js.map +1 -1
- package/dist/index.js +7 -0
- package/dist/index.js.map +1 -1
- package/dist/merge-fields/mergeField.js +2 -2
- package/dist/merge-fields/mergeField.js.map +1 -1
- package/dist/merge-fields/mergeOrAddSubfield.js +5 -2
- package/dist/merge-fields/mergeOrAddSubfield.js.map +1 -1
- package/dist/merge-fields.spec.js +1 -1
- package/dist/merge-fields.spec.js.map +1 -1
- package/dist/normalize-identifiers.js +1 -1
- package/dist/normalize-identifiers.js.map +1 -1
- package/dist/normalizeFieldForComparison.js +12 -2
- package/dist/normalizeFieldForComparison.js.map +1 -1
- package/dist/normalizeSubfieldValueForComparison.js +1 -1
- package/dist/normalizeSubfieldValueForComparison.js.map +1 -1
- package/dist/prepublicationUtils.js +17 -6
- package/dist/prepublicationUtils.js.map +1 -1
- package/dist/removeInferiorDataFields.js +27 -4
- package/dist/removeInferiorDataFields.js.map +1 -1
- package/dist/sortRelatorTerms.js +6 -4
- package/dist/sortRelatorTerms.js.map +1 -1
- package/dist/translate-terms.js +1 -3
- package/dist/translate-terms.js.map +1 -1
- package/package.json +5 -2
- package/src/disambiguateSeriesStatements.js +228 -0
- package/src/disambiguateSeriesStatements.spec.js +52 -0
- package/src/fix-country-codes.spec.js +1 -1
- package/src/index.js +2 -0
- package/src/merge-fields/mergeField.js +4 -4
- package/src/merge-fields/mergeOrAddSubfield.js +3 -3
- package/src/merge-fields.spec.js +1 -1
- package/src/normalize-identifiers.js +1 -1
- package/src/normalizeFieldForComparison.js +9 -4
- package/src/normalizeSubfieldValueForComparison.js +1 -1
- package/src/prepublicationUtils.js +17 -6
- package/src/removeInferiorDataFields.js +32 -5
- package/src/sortRelatorTerms.js +4 -4
- package/src/translate-terms.js +1 -2
- package/test-fixtures/disambiguate-series-statements/f01/expectedResult.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f01/metadata.json +5 -0
- package/test-fixtures/disambiguate-series-statements/f01/record.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f02/expectedResult.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f02/metadata.json +5 -0
- package/test-fixtures/disambiguate-series-statements/f02/record.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f03/expectedResult.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f03/metadata.json +5 -0
- package/test-fixtures/disambiguate-series-statements/f03/record.json +15 -0
- package/test-fixtures/disambiguate-series-statements/f04/expectedResult.json +17 -0
- package/test-fixtures/disambiguate-series-statements/f04/metadata.json +6 -0
- package/test-fixtures/disambiguate-series-statements/f04/record.json +16 -0
- package/test-fixtures/disambiguate-series-statements/f05/expectedResult.json +17 -0
- package/test-fixtures/disambiguate-series-statements/f05/metadata.json +5 -0
- package/test-fixtures/disambiguate-series-statements/f05/record.json +17 -0
- package/test-fixtures/disambiguate-series-statements/v01/expectedResult.json +6 -0
- package/test-fixtures/disambiguate-series-statements/v01/metadata.json +5 -0
- package/test-fixtures/disambiguate-series-statements/v01/record.json +16 -0
- package/test-fixtures/disambiguate-series-statements/v04/expectedResult.json +4 -0
- package/test-fixtures/disambiguate-series-statements/v04/metadata.json +6 -0
- package/test-fixtures/disambiguate-series-statements/v04/record.json +16 -0
- package/test-fixtures/remove-inferior-datafields/f13/expectedResult.json +7 -0
- package/test-fixtures/remove-inferior-datafields/f13/record.json +11 -0
- package/test-fixtures/remove-inferior-datafields/f15/expectedResult.json +12 -0
- package/test-fixtures/remove-inferior-datafields/f15/metadata.json +6 -0
- package/test-fixtures/remove-inferior-datafields/f15/record.json +17 -0
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
import createDebugLogger from 'debug';
|
|
2
|
+
import {fieldToString, nvdebug, subfieldToString} from './utils';
|
|
3
|
+
import {MARCXML} from '@natlibfi/marc-record-serializers';
|
|
4
|
+
import {Error} from '@natlibfi/melinda-commons';
|
|
5
|
+
import clone from 'clone';
|
|
6
|
+
import {default as createNatlibfiSruClient} from '@natlibfi/sru-client';
|
|
7
|
+
import {fieldFixPunctuation} from './punctuation2';
|
|
8
|
+
|
|
9
|
+
//const {default: createNatlibfiSruClient} = natlibfiSruClient;
|
|
10
|
+
|
|
11
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:disambiguateSeriesStatements');
|
|
12
|
+
|
|
13
|
+
const ELECTRONIC = 1;
|
|
14
|
+
const PRINTED = 2;
|
|
15
|
+
const NEITHER_OR_UNKNOWN = 0;
|
|
16
|
+
const SRU_API_URL = 'https://sru.api.melinda.kansalliskirjasto.fi/bib';
|
|
17
|
+
|
|
18
|
+
// Author(s): Nicholas Volk
|
|
19
|
+
export default function () {
|
|
20
|
+
const sruClient = createSruClient(SRU_API_URL);
|
|
21
|
+
|
|
22
|
+
return {
|
|
23
|
+
description: 'Disambiguate between printed and electonic series statements (490 with multiple $xs)',
|
|
24
|
+
validate, fix
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
async function fix(record) {
|
|
28
|
+
const recordType = getRecordType(record);
|
|
29
|
+
|
|
30
|
+
const relevantFields = getRelevantFields(record.fields);
|
|
31
|
+
const message = await fix490x(recordType, relevantFields, true);
|
|
32
|
+
|
|
33
|
+
return {message, fix: [], valid: true};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async function validate(record) {
|
|
37
|
+
const recordType = getRecordType(record);
|
|
38
|
+
|
|
39
|
+
const relevantFields = getRelevantFields(record.fields);
|
|
40
|
+
const message = await fix490x(recordType, relevantFields, false);
|
|
41
|
+
|
|
42
|
+
return {message, valid: message.length === 0};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
function getValidIssnSubfields(field) {
|
|
47
|
+
const subfields = field.subfields?.filter(sf => sf.code === 'x' && sf.value.match(/^[0-9]{4}-[0-9][0-9][0-9][0-9Xx][^0-9Xx]*$/u));
|
|
48
|
+
return subfields;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function isRelevantField(field) {
|
|
52
|
+
if (field.tag !== '490') {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
return getValidIssnSubfields(field).length > 1;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function getRelevantFields(fields) {
|
|
59
|
+
return fields.filter(f => isRelevantField(f));
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function fix490x(recordType, fields, reallyFix, message = []) {
|
|
63
|
+
|
|
64
|
+
if (recordType === NEITHER_OR_UNKNOWN) {
|
|
65
|
+
return message;
|
|
66
|
+
}
|
|
67
|
+
const [currField, ...remainingFields] = fields;
|
|
68
|
+
|
|
69
|
+
if (!currField) {
|
|
70
|
+
return message;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const validXs = getValidIssnSubfields(currField);
|
|
74
|
+
|
|
75
|
+
const deletableXs = await getRemovableSubfields(validXs, recordType);
|
|
76
|
+
|
|
77
|
+
if (deletableXs.length === 0 || deletableXs.length === validXs.length) {
|
|
78
|
+
return fix490x(recordType, remainingFields, reallyFix, message);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const deletableStrings = deletableXs.map(sf => subfieldToString(sf));
|
|
82
|
+
nvdebug(`Field has removable ISSNS: '${deletableStrings.join(', ')}`, debug);
|
|
83
|
+
|
|
84
|
+
// fixer:
|
|
85
|
+
if (reallyFix) {
|
|
86
|
+
currField.subfields = currField.subfields.filter(sf => !deletableStrings.includes(subfieldToString(sf))); // eslint-disable-line functional/immutable-data
|
|
87
|
+
fieldFixPunctuation(currField);
|
|
88
|
+
return fix490x(recordType, remainingFields, reallyFix, message);
|
|
89
|
+
}
|
|
90
|
+
// validators:
|
|
91
|
+
const clonedField = clone(currField);
|
|
92
|
+
const originalString = fieldToString(clonedField);
|
|
93
|
+
clonedField.subfields = clonedField.subfields.filter(sf => !deletableStrings.includes(subfieldToString(sf))); // eslint-disable-line functional/immutable-data
|
|
94
|
+
|
|
95
|
+
const newMessage = `Replace '${originalString}' with '${fieldToString(clonedField)}'`;
|
|
96
|
+
|
|
97
|
+
return fix490x(recordType, remainingFields, reallyFix, [...message, newMessage]);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function getRemovableSubfields(validXs, recordType, removables = []) {
|
|
101
|
+
const [currSubfield, ...remainingXs] = validXs;
|
|
102
|
+
|
|
103
|
+
if (!currSubfield) {
|
|
104
|
+
return removables;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const isRemoveable = await isRemovableSubfield(currSubfield, recordType);
|
|
108
|
+
if (isRemoveable) {
|
|
109
|
+
return getRemovableSubfields(remainingXs, recordType, [...removables, currSubfield]);
|
|
110
|
+
}
|
|
111
|
+
return getRemovableSubfields(remainingXs, recordType, removables);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async function isRemovableSubfield(subfield, recordType) {
|
|
115
|
+
//console.info(` isRemovableField() in...`); // eslint-disable-line no-console
|
|
116
|
+
const issn = subfield.value.substring(0, 9); // Strip punctuation (ISSN consists of nine letter, eg. "1234-5678")
|
|
117
|
+
|
|
118
|
+
//console.info(` got ISSN ${issn}`); // eslint-disable-line no-console
|
|
119
|
+
const issnRecords = await issnToRecords(issn);
|
|
120
|
+
//console.info(` ISSN returned ${issnRecords.length} record(s)`); // eslint-disable-line no-console
|
|
121
|
+
|
|
122
|
+
// !isMismatchingRecord !== isMatchingRecord as NEITHER_OR_UNKNOWN record type is neutral. Thus double negative "not mismatch". Sorry about that.
|
|
123
|
+
if (issnRecords.some(r => !isMismatchingRecord(r))) {
|
|
124
|
+
return false;
|
|
125
|
+
}
|
|
126
|
+
return true;
|
|
127
|
+
|
|
128
|
+
function isMismatchingRecord(r) {
|
|
129
|
+
const issnRecordType = getRecordType(r);
|
|
130
|
+
if (issnRecordType === NEITHER_OR_UNKNOWN) {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
return issnRecordType !== recordType;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
async function issnToRecords(issn) {
|
|
138
|
+
//console.log('issnToRecords() in...'); // eslint-disable-line no-console
|
|
139
|
+
const records = await search(sruClient, `bath.issn=${issn}`);
|
|
140
|
+
//console.log(`ISSN2RECORDS got ${records.length} record(s)!`); // eslint-disable-line no-console
|
|
141
|
+
return records;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function getRecordType(record) {
|
|
145
|
+
const f337 = record.get('337');
|
|
146
|
+
if (f337.length !== 1) {
|
|
147
|
+
return NEITHER_OR_UNKNOWN;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const b = f337[0].subfields.filter(sf => sf.code === 'b');
|
|
151
|
+
if (b.length !== 1) {
|
|
152
|
+
return NEITHER_OR_UNKNOWN;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (b[0].value === 'c') {
|
|
156
|
+
return ELECTRONIC;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (b[0].value === 'n') {
|
|
160
|
+
return PRINTED;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return NEITHER_OR_UNKNOWN;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// All the code below is copypasted from melinda-ui-artikkelit project file src/services/sruServices/sruClient.js
|
|
169
|
+
|
|
170
|
+
export function createSruClient(sruApiUrl) {
|
|
171
|
+
|
|
172
|
+
const sruClientOptions = {
|
|
173
|
+
url: sruApiUrl,
|
|
174
|
+
recordSchema: 'marcxml',
|
|
175
|
+
retrieveAll: false,
|
|
176
|
+
maxRecordsPerRequest: 100
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
return createNatlibfiSruClient(sruClientOptions);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
/*******************************************************************************/
|
|
184
|
+
/* Search and retrieve (copypaste from melinda-ui-artikkelit) */
|
|
185
|
+
|
|
186
|
+
export function search(sruClient, query, one = false) {
|
|
187
|
+
|
|
188
|
+
return new Promise((resolve, reject) => {
|
|
189
|
+
const promises = [];
|
|
190
|
+
|
|
191
|
+
const noValidation = {
|
|
192
|
+
fields: false,
|
|
193
|
+
subfields: false,
|
|
194
|
+
subfieldValues: false
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
// console.info(`SRU query: $${searchUrl}`);
|
|
198
|
+
|
|
199
|
+
sruClient.searchRetrieve(query)
|
|
200
|
+
.on('record', xmlString => {
|
|
201
|
+
promises.push(MARCXML.from(xmlString, noValidation)); // eslint-disable-line functional/immutable-data
|
|
202
|
+
})
|
|
203
|
+
.on('end', async () => {
|
|
204
|
+
try {
|
|
205
|
+
|
|
206
|
+
if (promises.length > 0) {
|
|
207
|
+
|
|
208
|
+
if (one) {
|
|
209
|
+
const [firstPromise] = promises;
|
|
210
|
+
const firstRecord = await firstPromise;
|
|
211
|
+
return resolve(firstRecord);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
const records = await Promise.all(promises);
|
|
215
|
+
return resolve(records);
|
|
216
|
+
}
|
|
217
|
+
reject(new Error(404, 'No records found with search and retrieve'));
|
|
218
|
+
|
|
219
|
+
} catch (error) {
|
|
220
|
+
reject(error);
|
|
221
|
+
}
|
|
222
|
+
})
|
|
223
|
+
.on('error', error => {
|
|
224
|
+
reject(error);
|
|
225
|
+
});
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import {expect} from 'chai';
|
|
2
|
+
import {MarcRecord} from '@natlibfi/marc-record';
|
|
3
|
+
import validatorFactory from './disambiguateSeriesStatements';
|
|
4
|
+
import {READERS} from '@natlibfi/fixura';
|
|
5
|
+
import generateTests from '@natlibfi/fixugen';
|
|
6
|
+
import createDebugLogger from 'debug';
|
|
7
|
+
|
|
8
|
+
generateTests({
|
|
9
|
+
callback,
|
|
10
|
+
path: [__dirname, '..', 'test-fixtures', 'disambiguate-series-statements'],
|
|
11
|
+
useMetadataFile: true,
|
|
12
|
+
recurse: false,
|
|
13
|
+
fixura: {
|
|
14
|
+
reader: READERS.JSON
|
|
15
|
+
},
|
|
16
|
+
mocha: {
|
|
17
|
+
before: () => testValidatorFactory()
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/disambiguateSeriesStatements:test');
|
|
21
|
+
|
|
22
|
+
async function testValidatorFactory() {
|
|
23
|
+
const validator = await validatorFactory();
|
|
24
|
+
|
|
25
|
+
expect(validator)
|
|
26
|
+
.to.be.an('object')
|
|
27
|
+
.that.has.any.keys('description', 'validate');
|
|
28
|
+
|
|
29
|
+
expect(validator.description).to.be.a('string');
|
|
30
|
+
expect(validator.validate).to.be.a('function');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async function callback({getFixture, enabled = true, fix = false}) {
|
|
34
|
+
if (enabled === false) {
|
|
35
|
+
debug('TEST SKIPPED!');
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const validator = await validatorFactory();
|
|
40
|
+
const record = new MarcRecord(getFixture('record.json'));
|
|
41
|
+
const expectedResult = getFixture('expectedResult.json');
|
|
42
|
+
// console.log(expectedResult); // eslint-disable-line
|
|
43
|
+
|
|
44
|
+
if (!fix) {
|
|
45
|
+
const result = await validator.validate(record);
|
|
46
|
+
expect(result).to.eql(expectedResult);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
await validator.fix(record);
|
|
51
|
+
expect(record).to.eql(expectedResult);
|
|
52
|
+
}
|
|
@@ -17,7 +17,7 @@ generateTests({
|
|
|
17
17
|
before: () => testValidatorFactory()
|
|
18
18
|
}
|
|
19
19
|
});
|
|
20
|
-
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/fix-country-
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/fix-country-codes:test');
|
|
21
21
|
|
|
22
22
|
async function testValidatorFactory() {
|
|
23
23
|
const validator = await validatorFactory();
|
package/src/index.js
CHANGED
|
@@ -5,6 +5,7 @@ import AddMissingField337 from './addMissingField337';
|
|
|
5
5
|
import AddMissingField338 from './addMissingField338';
|
|
6
6
|
import Cyrillux from './cyrillux';
|
|
7
7
|
import CyrilluxUsemarconReplacement from './cyrillux-usemarcon-replacement';
|
|
8
|
+
import DisambiguateSeriesStatements from './disambiguateSeriesStatements';
|
|
8
9
|
import DoubleCommas from './double-commas';
|
|
9
10
|
import DuplicatesInd1 from './duplicates-ind1';
|
|
10
11
|
import EmptyFields from './empty-fields';
|
|
@@ -64,6 +65,7 @@ export {
|
|
|
64
65
|
AddMissingField338,
|
|
65
66
|
Cyrillux,
|
|
66
67
|
CyrilluxUsemarconReplacement,
|
|
68
|
+
DisambiguateSeriesStatements,
|
|
67
69
|
DoubleCommas,
|
|
68
70
|
DuplicatesInd1,
|
|
69
71
|
EmptyFields,
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
//import {MarcRecord} from '@natlibfi/marc-record';
|
|
2
2
|
import createDebugLogger from 'debug';
|
|
3
|
-
import {
|
|
3
|
+
import {fieldToString, fieldsToString, fieldsAreIdentical, nvdebug, hasCopyright, removeCopyright, subfieldToString} from '../utils';
|
|
4
4
|
import {fieldGetOccurrenceNumberPairs} from '../subfield6Utils.js';
|
|
5
|
-
import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation} from '../normalizeFieldForComparison';
|
|
5
|
+
import {cloneAndNormalizeFieldForComparison, cloneAndRemovePunctuation, isEnnakkotietoSubfieldG} from '../normalizeFieldForComparison';
|
|
6
6
|
import {mergeOrAddSubfield} from './mergeOrAddSubfield';
|
|
7
7
|
import {mergeIndicators} from './mergeIndicator';
|
|
8
8
|
import {mergableTag} from './mergableTag';
|
|
@@ -25,7 +25,7 @@ const debugDev = debug.extend('dev');
|
|
|
25
25
|
|
|
26
26
|
// NB! Can be do this via config.json?
|
|
27
27
|
function removeEnnakkotieto(field) {
|
|
28
|
-
const tmp = field.subfields.filter(subfield => subfield
|
|
28
|
+
const tmp = field.subfields.filter(subfield => !isEnnakkotietoSubfieldG(subfield));
|
|
29
29
|
// remove only iff some other subfield remains
|
|
30
30
|
if (tmp.length > 0) { // eslint-disable-line functional/no-conditional-statements
|
|
31
31
|
field.subfields = tmp; // eslint-disable-line functional/immutable-data
|
|
@@ -60,7 +60,7 @@ function mergeField2(baseRecord, baseField, sourceField, config, candFieldPairs8
|
|
|
60
60
|
// If a base ennakkotieto is merged with real data, remove ennakkotieto subfield:
|
|
61
61
|
// (If our prepub normalizations are ok, this should not be needed.
|
|
62
62
|
// However, it's simple and works well enough, so let's keep it here.)
|
|
63
|
-
if (
|
|
63
|
+
if (baseField.subfields?.find(sf => isEnnakkotietoSubfieldG(sf)) && !sourceField.subfields?.find(sf => isEnnakkotietoSubfieldG(sf))) { // eslint-disable-line functional/no-conditional-statements
|
|
64
64
|
removeEnnakkotieto(baseField);
|
|
65
65
|
baseField.merged = 1; // eslint-disable-line functional/immutable-data
|
|
66
66
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import createDebugLogger from 'debug';
|
|
2
|
-
import {cloneAndNormalizeFieldForComparison} from '../normalizeFieldForComparison.js';
|
|
2
|
+
import {cloneAndNormalizeFieldForComparison, isEnnakkotietoSubfieldG} from '../normalizeFieldForComparison.js';
|
|
3
3
|
import {normalizeAs, normalizeControlSubfieldValue} from '../normalize-identifiers';
|
|
4
4
|
import {fieldHasSubfield, fieldToString, isControlSubfieldCode, nvdebug, subfieldIsRepeatable, subfieldToString} from '../utils.js';
|
|
5
5
|
import {mergeSubfield} from './mergeSubfield.js';
|
|
@@ -26,10 +26,10 @@ function catalogingSourceModifyingAgencyCandIsOriginalCatalogingSourceAgencyInTa
|
|
|
26
26
|
}
|
|
27
27
|
|
|
28
28
|
function ennakkotietoInSubfieldG(candSubfieldData) {
|
|
29
|
-
if (
|
|
29
|
+
if (isEnnakkotietoSubfieldG({'code': candSubfieldData.code, 'value': candSubfieldData.originalValue})) {
|
|
30
30
|
// Skip just ‡g subfield or the whole field?
|
|
31
31
|
// We decided to skip just this subfield. We want at least $0 and maybe even more from ennakkotieto.
|
|
32
|
-
debugDev(
|
|
32
|
+
debugDev(`Skip '‡g ${candSubfieldData.originalValue}'`);
|
|
33
33
|
return true;
|
|
34
34
|
}
|
|
35
35
|
return false;
|
package/src/merge-fields.spec.js
CHANGED
|
@@ -17,7 +17,7 @@ generateTests({
|
|
|
17
17
|
before: () => testValidatorFactory()
|
|
18
18
|
}
|
|
19
19
|
});
|
|
20
|
-
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/merge-
|
|
20
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/merge-fields:test');
|
|
21
21
|
|
|
22
22
|
async function testValidatorFactory() {
|
|
23
23
|
const validator = await validatorFactory();
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
//import createDebugLogger from 'debug';
|
|
3
3
|
import clone from 'clone';
|
|
4
4
|
import {fieldToString} from './utils';
|
|
5
|
-
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:
|
|
5
|
+
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:normalize-identifiers');
|
|
6
6
|
|
|
7
7
|
/*
|
|
8
8
|
function nvdebug(message, func) {
|
|
@@ -14,10 +14,17 @@ import {fieldNormalizeControlNumbers/*, normalizeControlSubfieldValue*/} from '.
|
|
|
14
14
|
import createDebugLogger from 'debug';
|
|
15
15
|
import {normalizePartData, subfieldContainsPartData} from './normalizeSubfieldValueForComparison';
|
|
16
16
|
|
|
17
|
-
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:
|
|
17
|
+
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizeFieldForComparison');
|
|
18
18
|
//const debugData = debug.extend('data');
|
|
19
19
|
const debugDev = debug.extend('dev');
|
|
20
20
|
|
|
21
|
+
export function isEnnakkotietoSubfieldG(subfield) {
|
|
22
|
+
if (subfield.code !== 'g') {
|
|
23
|
+
return false;
|
|
24
|
+
}
|
|
25
|
+
return subfield.value.match(/^ENNAKKOTIETO\.?$/gu);
|
|
26
|
+
}
|
|
27
|
+
|
|
21
28
|
function debugFieldComparison(oldField, newField) { // NB: Debug-only function!
|
|
22
29
|
/*
|
|
23
30
|
// We may drop certain subfields:
|
|
@@ -62,12 +69,10 @@ function containsCorporateName(tag = '???', subfieldCode = undefined) {
|
|
|
62
69
|
|
|
63
70
|
function skipAllSubfieldNormalizations(value, subfieldCode, tag) {
|
|
64
71
|
|
|
65
|
-
|
|
66
|
-
if (subfieldCode === 'g' && value === 'ENNAKKOTIETO.') {
|
|
72
|
+
if (isEnnakkotietoSubfieldG({'code': subfieldCode, value})) {
|
|
67
73
|
return true;
|
|
68
74
|
}
|
|
69
75
|
|
|
70
|
-
|
|
71
76
|
if (tag === '035' && ['a', 'z'].includes(subfieldCode)) { // A
|
|
72
77
|
return true;
|
|
73
78
|
}
|
|
@@ -3,7 +3,7 @@ import createDebugLogger from 'debug';
|
|
|
3
3
|
|
|
4
4
|
// Normalizes at least 490$v and 773$g which contain information such as "Raita 5" vs "5", and "Osa 3" vs "Osa III".
|
|
5
5
|
|
|
6
|
-
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:
|
|
6
|
+
const debug = createDebugLogger('@natlibfi/melinda-marc-record-merge-reducers:normalizeSubfieldValueForComparison');
|
|
7
7
|
//const debugData = debug.extend('data');
|
|
8
8
|
const debugDev = debug.extend('dev');
|
|
9
9
|
|
|
@@ -24,7 +24,12 @@ export function encodingLevelIsBetterThanPrepublication(encodingLevel) {
|
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
function containsSubstringInSubfieldA(field, substring) {
|
|
27
|
+
function containsSubstringInSubfieldA(field, substring, ignoreCase = false) {
|
|
28
|
+
if (ignoreCase) {
|
|
29
|
+
const lowercasedSubstring = substring.toLowerCase();
|
|
30
|
+
return field.subfields.some(sf => sf.code === 'a' && sf.value.toLowerCase().includes(lowercasedSubstring));
|
|
31
|
+
|
|
32
|
+
}
|
|
28
33
|
return field.subfields.some(sf => sf.code === 'a' && sf.value.includes(substring));
|
|
29
34
|
}
|
|
30
35
|
|
|
@@ -36,14 +41,16 @@ export function fieldRefersToKoneellisestiTuotettuTietue(field) {
|
|
|
36
41
|
|
|
37
42
|
|
|
38
43
|
export function fieldRefersToTarkistettuEnnakkotieto(field) {
|
|
39
|
-
return containsSubstringInSubfieldA(field, 'TARKISTETTU ENNAKKOTIETO');
|
|
44
|
+
return containsSubstringInSubfieldA(field, 'TARKISTETTU ENNAKKOTIETO', true);
|
|
40
45
|
}
|
|
41
46
|
|
|
42
47
|
|
|
43
48
|
export function fieldRefersToEnnakkotieto(field) {
|
|
44
49
|
// NB! This no longer matches 'TARKISTETTU ENNAKKOTIETO' case! Bug or Feature?
|
|
45
|
-
if (
|
|
46
|
-
|
|
50
|
+
if (!fieldRefersToTarkistettuEnnakkotieto(field)) {
|
|
51
|
+
if (containsSubstringInSubfieldA(field, 'ENNAKKOTIETO', true)) {
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
47
54
|
}
|
|
48
55
|
// MRA-420: "EI VIELÄ ILMESTYNYT" is a Helmet note, that is semantically similar to ENNAKKOTIETO:
|
|
49
56
|
return containsSubstringInSubfieldA(field, 'EI VIELÄ ILMESTYNYT');
|
|
@@ -75,6 +82,7 @@ export function firstFieldHasBetterPrepubEncodingLevel(field1, field2) {
|
|
|
75
82
|
|
|
76
83
|
/*
|
|
77
84
|
function hasEnnakkotietoSubfield(field) {
|
|
85
|
+
// NB! This has apparently changed to lower case 'ennakkotieto'...
|
|
78
86
|
return field.subfields.some(sf => ['g', '9'].includes(sf.code) && sf.value.includes('ENNAKKOTIETO'));
|
|
79
87
|
}
|
|
80
88
|
*/
|
|
@@ -229,8 +237,11 @@ export function isEnnakkotietoSubfield(subfield) {
|
|
|
229
237
|
return false;
|
|
230
238
|
}
|
|
231
239
|
// Length <= 13 allows punctuation, but does not require it:
|
|
232
|
-
if (subfield.value.
|
|
233
|
-
|
|
240
|
+
if (subfield.value.length <= 13) {
|
|
241
|
+
const coreString = subfield.value.substr(0, 12);
|
|
242
|
+
if (coreString === 'ENNAKKOTIETO' || coreString === 'ennakkotieto') { // Lowercase term first seen in MET-575
|
|
243
|
+
return true;
|
|
244
|
+
}
|
|
234
245
|
}
|
|
235
246
|
return false;
|
|
236
247
|
}
|
|
@@ -12,7 +12,7 @@ import {fixComposition, precomposeFinnishLetters} from './normalize-utf8-diacrit
|
|
|
12
12
|
// NB! This validator handles only full fields, and does not support subfield $8 removal.
|
|
13
13
|
// Also, having multiple $8 subfields in same fields is not supported.
|
|
14
14
|
// If this functionality is needed, see removeDuplicateDatafields.js for examples of subfield-only stuff.
|
|
15
|
-
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:
|
|
15
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:removeInferiorDataFields');
|
|
16
16
|
|
|
17
17
|
export default function () {
|
|
18
18
|
return {
|
|
@@ -233,7 +233,25 @@ function deriveIndividualDeletables(record) {
|
|
|
233
233
|
|
|
234
234
|
const deletableStringsArray = processTodoList(todoList);
|
|
235
235
|
|
|
236
|
-
|
|
236
|
+
const inferiorTerms = getInferiorTerms(record);
|
|
237
|
+
|
|
238
|
+
return uniqArray([...deletableStringsArray, ...inferiorTerms]);
|
|
239
|
+
|
|
240
|
+
function getInferiorTerms(record) {
|
|
241
|
+
const inputFields = record.fields.filter(f => ['648', '650', '651'].includes(f.tag) && f.subfields);
|
|
242
|
+
const result = inputFields.flatMap(f => fieldToInferiorFields(f));
|
|
243
|
+
|
|
244
|
+
// console.log(result.join('\n')); // eslint-disable-line no-console
|
|
245
|
+
return result;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
function fieldToInferiorFields(field) {
|
|
249
|
+
const aArray = field.subfields.filter(sf => sf.code === 'a');
|
|
250
|
+
if (field.tag === '650') {
|
|
251
|
+
return aArray.flatMap(sf => [`653 ## ‡a ${sf.value}`, `653 #0 ‡a ${sf.value}`]);
|
|
252
|
+
}
|
|
253
|
+
return aArray.map(sf => `653 ## ‡a ${sf.value}`);
|
|
254
|
+
}
|
|
237
255
|
|
|
238
256
|
function processTodoList(thingsToDo, deletables = []) {
|
|
239
257
|
const [currString, ...stillToDo] = thingsToDo;
|
|
@@ -297,10 +315,10 @@ function deriveIndividualDeletables(record) {
|
|
|
297
315
|
return processTodoList([...stillToDo, ...moreToDo], [...deletables, tmp]);
|
|
298
316
|
}
|
|
299
317
|
|
|
318
|
+
// MET-575 (merge: applies in postprocessing)
|
|
319
|
+
const inferiorTerms = getPrepublicationTerms(currString);
|
|
300
320
|
|
|
301
|
-
const
|
|
302
|
-
|
|
303
|
-
const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...ennakkotieto653];
|
|
321
|
+
const newDeletables = [...deletables, ...subsets, ...accentless, ...d490, ...inferiorTerms];
|
|
304
322
|
|
|
305
323
|
if (subsets.length) {
|
|
306
324
|
return processTodoList([...stillToDo, ...moreToDo], newDeletables);
|
|
@@ -323,6 +341,15 @@ function deriveIndividualDeletables(record) {
|
|
|
323
341
|
return [accentless];
|
|
324
342
|
}
|
|
325
343
|
|
|
344
|
+
function getPrepublicationTerms(fieldAsString) {
|
|
345
|
+
if (fieldAsString.match(/^653./u)) {
|
|
346
|
+
// MET-528 (extented by MET-575)
|
|
347
|
+
return [`${fieldAsString} ‡g ENNAKKOTIETO`, `${fieldAsString} ‡g ennakkotieto`, `${fieldAsString} ‡g ENNAKKOTIETO.`, `${fieldAsString} ‡g ennakkotieto.`];
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return [];
|
|
351
|
+
}
|
|
352
|
+
|
|
326
353
|
}
|
|
327
354
|
|
|
328
355
|
function fieldToNormalizedString(field) {
|
package/src/sortRelatorTerms.js
CHANGED
|
@@ -86,7 +86,7 @@ function swapRelatorTermSubfields(field, typeOfMaterial = undefined) {
|
|
|
86
86
|
|
|
87
87
|
const subfieldCode = tagToRelatorTermSubfieldCode(field.tag);
|
|
88
88
|
|
|
89
|
-
console.log(`Processing ${fieldToString(field)}`); // eslint-disable-line no-console
|
|
89
|
+
//console.log(`Processing ${fieldToString(field)}`); // eslint-disable-line no-console
|
|
90
90
|
|
|
91
91
|
const swapPosition = field.subfields.findIndex((subfield, index) => isSwappable(subfield, index));
|
|
92
92
|
|
|
@@ -96,10 +96,10 @@ function swapRelatorTermSubfields(field, typeOfMaterial = undefined) {
|
|
|
96
96
|
return;
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
-
console.log(`END ${fieldToString(field)}`); // eslint-disable-line no-console
|
|
99
|
+
//console.log(`END ${fieldToString(field)}`); // eslint-disable-line no-console
|
|
100
100
|
|
|
101
101
|
function swapRelatorTermPair(index) {
|
|
102
|
-
console.log(` SWAP`); // eslint-disable-line no-console
|
|
102
|
+
//console.log(` SWAP`); // eslint-disable-line no-console
|
|
103
103
|
|
|
104
104
|
// Swap:
|
|
105
105
|
const tmp = field.subfields[index - 1];
|
|
@@ -123,7 +123,7 @@ function swapRelatorTermSubfields(field, typeOfMaterial = undefined) {
|
|
|
123
123
|
return false;
|
|
124
124
|
}
|
|
125
125
|
const prevScore = scoreRelatorTerm(prevSubfield.value, typeOfMaterial);
|
|
126
|
-
console.log(`PREV: ${prevSubfield.value}/${prevScore}, CURR: ${sf.value}/${currScore}`); // eslint-disable-line no-console
|
|
126
|
+
// console.log(`PREV: ${prevSubfield.value}/${prevScore}, CURR: ${sf.value}/${currScore}`); // eslint-disable-line no-console
|
|
127
127
|
// If this subfield maps to a Work, then subfields can be swapped, even if we don't have a score for the prev subfield!
|
|
128
128
|
if (prevScore === 0 && currScore < WORST_WORK) {
|
|
129
129
|
return false;
|
package/src/translate-terms.js
CHANGED
|
@@ -3,13 +3,12 @@ import createDebugLogger from 'debug';
|
|
|
3
3
|
import {fieldHasSubfield, fieldToString, nvdebug} from './utils';
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:
|
|
6
|
+
const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda:translate-terms');
|
|
7
7
|
const defaultTags = ['648', '650', '651', '655'];
|
|
8
8
|
|
|
9
9
|
const swapLanguageCode = {'fin': 'swe', 'fi': 'sv', 'sv': 'fi', 'swe': 'fin'};
|
|
10
10
|
const changeAbbrHash = {'fi': 'fin', 'fin': 'fi', 'sv': 'swe', 'swe': 'sv'};
|
|
11
11
|
|
|
12
|
-
//const debug = createDebugLogger('@natlibfi/marc-record-validators-melinda/translate-term');
|
|
13
12
|
const termCache = {};
|
|
14
13
|
|
|
15
14
|
// Author(s): Nicholas Volk
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_validationOptions": {},
|
|
3
|
+
"leader": "12345cam a22123454i 4500",
|
|
4
|
+
"fields": [
|
|
5
|
+
{ "tag": "008", "value": "012345s7890 fi |||| mul |"},
|
|
6
|
+
{ "tag": "337", "ind1": " ", "ind2": " ", "subfields": [
|
|
7
|
+
{"code": "a", "value": "tietokonekäyttöinen"},
|
|
8
|
+
{"code": "b", "value": "c"},
|
|
9
|
+
{"code": "2", "value": "rdamedia"}
|
|
10
|
+
]},
|
|
11
|
+
{ "tag": "490", "ind1": " ", "ind2": " ", "subfields": [
|
|
12
|
+
{"code": "a", "value": "Sisu,"},
|
|
13
|
+
{"code": "x", "value": "1797-7746"}
|
|
14
|
+
]}
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"leader": "12345cam a22123454i 4500",
|
|
3
|
+
"fields": [
|
|
4
|
+
{ "tag": "008", "value": "012345s7890 fi |||| mul |"},
|
|
5
|
+
{ "tag": "337", "ind1": " ", "ind2": " ", "subfields": [
|
|
6
|
+
{"code": "a", "value": "tietokonekäyttöinen"},
|
|
7
|
+
{"code": "b", "value": "c"},
|
|
8
|
+
{"code": "2", "value": "rdamedia"}
|
|
9
|
+
]},
|
|
10
|
+
{ "tag": "490", "ind1": " ", "ind2": " ", "subfields": [
|
|
11
|
+
{"code": "a", "value": "Sisu"},
|
|
12
|
+
{"code": "x", "value": "1797-7746"},
|
|
13
|
+
{"code": "x", "value": "1797-5905"}
|
|
14
|
+
]}
|
|
15
|
+
]
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_validationOptions": {},
|
|
3
|
+
"leader": "12345cam a22123454i 4500",
|
|
4
|
+
"fields": [
|
|
5
|
+
{ "tag": "008", "value": "012345s7890 fi |||| mul |"},
|
|
6
|
+
{ "tag": "337", "ind1": " ", "ind2": " ", "subfields": [
|
|
7
|
+
{"code": "a", "value": "käytettävissä ilman laitetta"},
|
|
8
|
+
{"code": "b", "value": "n"},
|
|
9
|
+
{"code": "2", "value": "rdamedia"}
|
|
10
|
+
]},
|
|
11
|
+
{ "tag": "490", "ind1": " ", "ind2": " ", "subfields": [
|
|
12
|
+
{"code": "a", "value": "Sisu,"},
|
|
13
|
+
{"code": "x", "value": "1797-5905"}
|
|
14
|
+
]}
|
|
15
|
+
]
|
|
16
|
+
}
|