@biblioteksentralen/marc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # `@biblioteksentralen/marc`
2
+
3
+ Package for representating MARC records in TypeScript and serialize to/from MARC XML and JSON.
4
+
5
+ The JSON serialization is compatible with schema defined by <https://www.npmjs.com/package/@natlibfi/marc-record>
6
+
7
+ ## Usage
8
+
9
+ ### Parsing XML
10
+
11
+ ```ts
12
+ import { parseMarcXml } from "@biblioteksentralen/marc";
13
+
14
+ const xmlRecord = `
15
+ <record xmlns="http://www.loc.gov/MARC21/slim">
16
+ <leader>00000cam^a22001937i^4500</leader>
17
+ <controlfield tag="001">000030000</controlfield>
18
+ <controlfield tag="003">FI-MELINDA</controlfield>
19
+ <controlfield tag="005">20141221175522.0</controlfield>
20
+ <controlfield tag="008">890208s1988^^^^sw^|||||||||||||||||swe||</controlfield>
21
+ <datafield tag="020" ind1=" " ind2=" ">
22
+ <subfield code="a">91-38-61844-3</subfield>
23
+ <subfield code="q">inb.</subfield>
24
+ </datafield>
25
+ <datafield tag="100" ind1="0" ind2=" ">
26
+ <subfield code="a">Durö, Robert.</subfield>
27
+ </datafield>
28
+ <datafield tag="245" ind1="1" ind2="0">
29
+ <subfield code="a">Konkurrensöverlägsenhet :</subfield>
30
+ <subfield code="b">i tio konkreta steg /</subfield>
31
+ <subfield code="c">Robert Durö.</subfield>
32
+ </datafield>
33
+ </record>
34
+ `;
35
+
36
+ const record = parseMarcXml(xmlRecord);
37
+ const title = record
38
+ .getSubfieldValues("245", /a|b|n|p/)
39
+ .join(" ")
40
+ ?.trim();
41
+ ```
42
+
43
+ ### Serializing / deserializing
44
+
45
+ The MarcRecord class can be JSON serialized:
46
+
47
+ ```ts
48
+ const serializedRecord = JSON.stringify(record);
49
+ ```
50
+
51
+ and deserialized:
52
+
53
+ ```ts
54
+ const record = MarcRecord.fromJSON(JSON.parse(serializedRecord));
55
+ ```
56
+
57
+ Standalone fields can also be deserialized using the `createDataField` function – useful for tests:
58
+
59
+ ```ts
60
+ const field = createDataField({
61
+ tag: "020",
62
+ subfields: [{ code: "a", value: "9788283982701" }],
63
+ });
64
+ ```
65
+
66
+ ### JSON Schema validation
67
+
68
+ The package exports a JSON schema that can be used to validate JSON serialized
69
+ MARC record structure using AJV or other JSON Schema validators.
70
+
71
+ ```ts
72
+ import Ajv from "ajv";
73
+ import { createMarcSchema } from "@biblioteksentralen/marc";
74
+
75
+ const schema = createMarcSchema();
76
+ const ajv = new Ajv();
77
+ const validate = ajv.compile(schema);
78
+ const result = validate(record.toJSON());
79
+ ```
@@ -0,0 +1,143 @@
1
+ import xpath from 'xpath';
2
+ import { Logger } from 'ts-log';
3
+
4
+ type SerializedMarcField = SerializedDataField | SerializedControlField;
5
+ interface SerializedControlField {
6
+ tag: string;
7
+ value: string;
8
+ }
9
+ interface SerializedDataField {
10
+ tag: string;
11
+ ind1?: string;
12
+ ind2?: string;
13
+ subfields: SerializedMarcSubfield[];
14
+ }
15
+ interface SerializedMarcSubfield {
16
+ code: string;
17
+ value: string;
18
+ }
19
+ declare class Subfield {
20
+ readonly code: string;
21
+ readonly value: string;
22
+ constructor(code: string, value: string);
23
+ toJSON(): SerializedMarcSubfield;
24
+ toString(): string;
25
+ }
26
+ declare class ControlField {
27
+ readonly tag: string;
28
+ readonly value: string;
29
+ constructor(tag: string, value: string);
30
+ toJSON(): SerializedControlField;
31
+ toString(): string;
32
+ }
33
+ declare class DataField {
34
+ readonly tag: string;
35
+ readonly ind1: string | undefined;
36
+ readonly ind2: string | undefined;
37
+ readonly subfields: Subfield[];
38
+ constructor(tag: string, ind1: string | undefined, ind2: string | undefined, subfields: Subfield[]);
39
+ getSubfields(code?: string | RegExp): Subfield[];
40
+ getFirstSubfield(code: string | RegExp): Subfield | undefined;
41
+ getFirstSubfieldValue(code: string | RegExp): string | undefined;
42
+ toJSON(): SerializedDataField;
43
+ toString(): string;
44
+ }
45
+ type MarcField = ControlField | DataField;
46
+ declare const createDataField: (field: SerializedDataField) => DataField;
47
+ declare const createControlField: (field: SerializedControlField) => ControlField;
48
+
49
+ type Indicators = {
50
+ ind1?: string;
51
+ ind2?: string;
52
+ };
53
+ interface SerializedMarcRecord {
54
+ format?: string;
55
+ leader: string;
56
+ fields: SerializedMarcField[];
57
+ }
58
+ declare class MarcRecord {
59
+ readonly format: string | undefined;
60
+ readonly leader: string;
61
+ readonly fields: MarcField[];
62
+ constructor({ leader, fields, format, }: {
63
+ leader: string;
64
+ fields: MarcField[];
65
+ format?: string;
66
+ });
67
+ static fromJSON(data: unknown): MarcRecord;
68
+ static validateJSON(data: unknown): SerializedMarcRecord;
69
+ getControlFields(): ControlField[];
70
+ getControlField(tag: string): ControlField | undefined;
71
+ getDataFields(tag?: string | RegExp, indicators?: Indicators): DataField[];
72
+ getFirstDataField(tag: string | RegExp, indicators?: Indicators): DataField | undefined;
73
+ getSubfields(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): Subfield[];
74
+ getSubfieldValues(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string[];
75
+ getFirstSubfieldValue(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string | undefined;
76
+ toJSON(): SerializedMarcRecord;
77
+ toString(): string;
78
+ }
79
+
80
+ interface MarcXmlOptions {
81
+ namespace?: string;
82
+ /**
83
+ * Callback function to transform or filter control fields.
84
+ */
85
+ processControlField?: (field: ControlField) => ControlField | undefined;
86
+ /**
87
+ * Callback function to transform or filter data fields.
88
+ */
89
+ processDataField?: (field: DataField) => DataField | undefined;
90
+ /**
91
+ * Free-form string that specifies the MARC record flavour.
92
+ */
93
+ format?: string;
94
+ }
95
+ declare function parseMarcXml(input: string, options?: MarcXmlOptions): MarcRecord[];
96
+
97
+ type NamespaceMap = Record<string, string>;
98
+ interface ParseOptions {
99
+ log?: Logger;
100
+ namespaces?: NamespaceMap;
101
+ }
102
+ declare function parseXml(xmlText: string, { log, namespaces }?: ParseOptions): XmlElement;
103
+ /**
104
+ * XPath-focused helper class for extracting information from an XML document.
105
+ */
106
+ declare class XmlElement {
107
+ readonly node: Document | Element;
108
+ protected namespaces: NamespaceMap;
109
+ protected select: xpath.XPathSelect;
110
+ constructor(node: Document | Element, namespaces?: NamespaceMap);
111
+ toString(): string;
112
+ /**
113
+ * Get the underlying Element node. Returns undefined if the underlying
114
+ * node is the root Document node.
115
+ */
116
+ get element(): Element | undefined;
117
+ /**
118
+ * Get namespaces used in XPath queries.
119
+ */
120
+ getNamespaces(): NamespaceMap;
121
+ /**
122
+ * Find anything (nodes, text, etc.) by xpath.
123
+ * @param {string} query - XPath query.
124
+ */
125
+ query(query: string): xpath.SelectedValue[];
126
+ /**
127
+ * Find element nodes by XPath query.
128
+ * @param {string} query - XPath query.
129
+ */
130
+ elements(query: string): XmlElement[];
131
+ /**
132
+ * Get text content of the current node or the *first* result matched by an XPath query.
133
+ * @param {string} query - XPath query.
134
+ */
135
+ text(query?: string): string | undefined;
136
+ /**
137
+ * Get atribute value of the current node.
138
+ * @param {string} name - Attribute name
139
+ */
140
+ attr(name: string): string | null | undefined;
141
+ }
142
+
143
+ export { ControlField, DataField, type MarcField, MarcRecord, type SerializedControlField, type SerializedDataField, type SerializedMarcRecord, Subfield, XmlElement, createControlField, createDataField, parseMarcXml, parseXml };
package/dist/index.js ADDED
@@ -0,0 +1,429 @@
1
+ 'use strict';
2
+
3
+ var xmldom = require('@xmldom/xmldom');
4
+ var xpath = require('xpath');
5
+ var Ajv = require('ajv');
6
+
7
+ function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
8
+
9
+ var xpath__default = /*#__PURE__*/_interopDefault(xpath);
10
+ var Ajv__default = /*#__PURE__*/_interopDefault(Ajv);
11
+
12
+ // src/xml-element/XmlElement.ts
13
+
14
+ // src/xml-element/NodeType.ts
15
+ var NodeType = {
16
+ ELEMENT_NODE: 1,
17
+ ATTRIBUTE_NODE: 2,
18
+ TEXT_NODE: 3,
19
+ CDATA_SECTION_NODE: 4,
20
+ ENTITY_REFERENCE_NODE: 5,
21
+ ENTITY_NODE: 6,
22
+ PROCESSING_INSTRUCTION_NODE: 7,
23
+ COMMENT_NODE: 8,
24
+ DOCUMENT_NODE: 9,
25
+ DOCUMENT_TYPE_NODE: 1,
26
+ DOCUMENT_FRAGMENT_NODE: 1,
27
+ NOTATION_NODE: 1
28
+ };
29
+ var isNode = (value) => typeof value === "object" && value !== null && "nodeType" in value;
30
+ var isElementNode = (node) => isNode(node) && node.nodeType === NodeType.ELEMENT_NODE;
31
+
32
+ // src/xml-element/XmlElement.ts
33
+ var InvalidXml = class extends Error {
34
+ };
35
+ function parseXml(xmlText, { log = console, namespaces = {} } = {}) {
36
+ const errorHandler = (error) => {
37
+ const errorMsg = error instanceof Error ? error.message : String(error);
38
+ throw new InvalidXml(
39
+ `Failed to parse XML response: "${xmlText}". Error: "${errorMsg}"`
40
+ );
41
+ };
42
+ const doc = new xmldom.DOMParser({
43
+ errorHandler: {
44
+ warning: (msg) => log.warn(msg),
45
+ error: errorHandler,
46
+ fatalError: errorHandler
47
+ }
48
+ }).parseFromString(xmlText, "text/xml");
49
+ return new XmlElement(doc, namespaces);
50
+ }
51
+ var XmlElement = class _XmlElement {
52
+ node;
53
+ namespaces;
54
+ select;
55
+ constructor(node, namespaces = {}) {
56
+ this.node = node;
57
+ this.namespaces = namespaces;
58
+ this.select = xpath__default.default.useNamespaces(namespaces);
59
+ }
60
+ toString() {
61
+ return new xmldom.XMLSerializer().serializeToString(this.node);
62
+ }
63
+ /**
64
+ * Get the underlying Element node. Returns undefined if the underlying
65
+ * node is the root Document node.
66
+ */
67
+ get element() {
68
+ return isElementNode(this.node) ? this.node : void 0;
69
+ }
70
+ /**
71
+ * Get namespaces used in XPath queries.
72
+ */
73
+ getNamespaces() {
74
+ return this.namespaces;
75
+ }
76
+ /**
77
+ * Find anything (nodes, text, etc.) by xpath.
78
+ * @param {string} query - XPath query.
79
+ */
80
+ query(query) {
81
+ return this.select(query, this.node);
82
+ }
83
+ /**
84
+ * Find element nodes by XPath query.
85
+ * @param {string} query - XPath query.
86
+ */
87
+ elements(query) {
88
+ return this.query(query).filter(isElementNode).map((elementNode) => new _XmlElement(elementNode, this.namespaces));
89
+ }
90
+ /**
91
+ * Get text content of the current node or the *first* result matched by an XPath query.
92
+ * @param {string} query - XPath query.
93
+ */
94
+ text(query) {
95
+ if (!query) {
96
+ return this.node.textContent ? this.node.textContent : void 0;
97
+ }
98
+ return this.elements(query)[0]?.text();
99
+ }
100
+ /**
101
+ * Get atribute value of the current node.
102
+ * @param {string} name - Attribute name
103
+ */
104
+ attr(name) {
105
+ return isElementNode(this.node) ? this.node.getAttribute(name) : void 0;
106
+ }
107
+ };
108
+
109
+ // src/marc-record/MarcField.ts
110
+ var Subfield = class {
111
+ constructor(code, value) {
112
+ this.code = code;
113
+ this.value = value;
114
+ }
115
+ toJSON() {
116
+ return { code: this.code, value: this.value };
117
+ }
118
+ toString() {
119
+ return `$${this.code} ${this.value}`;
120
+ }
121
+ };
122
+ var ControlField = class {
123
+ constructor(tag, value) {
124
+ this.tag = tag;
125
+ this.value = value;
126
+ }
127
+ toJSON() {
128
+ return { tag: this.tag, value: this.value };
129
+ }
130
+ toString() {
131
+ return `${this.tag} ${this.value}`;
132
+ }
133
+ };
134
+ var DataField = class {
135
+ constructor(tag, ind1, ind2, subfields) {
136
+ this.tag = tag;
137
+ this.ind1 = ind1;
138
+ this.ind2 = ind2;
139
+ this.subfields = subfields;
140
+ }
141
+ getSubfields(code) {
142
+ return typeof code === "undefined" ? this.subfields : this.subfields.filter(
143
+ (subfield) => matchFullString(subfield.code, code)
144
+ );
145
+ }
146
+ getFirstSubfield(code) {
147
+ return this.subfields.find(
148
+ (subfield) => matchFullString(subfield.code, code)
149
+ );
150
+ }
151
+ getFirstSubfieldValue(code) {
152
+ return this.getFirstSubfield(code)?.value;
153
+ }
154
+ toJSON() {
155
+ return {
156
+ tag: this.tag,
157
+ ind1: this.ind1,
158
+ ind2: this.ind2,
159
+ subfields: this.subfields.map((subfield) => subfield.toJSON())
160
+ };
161
+ }
162
+ toString() {
163
+ return `${this.tag} ${this.subfields.map((subfield) => subfield.toString()).join(" ")}`;
164
+ }
165
+ };
166
+ var isControlField = (field) => field instanceof ControlField;
167
+ var isDataField = (field) => field instanceof DataField;
168
+ var matchFullString = (value, pattern) => pattern instanceof RegExp ? (
169
+ // Rewrite RegExp to match the full string
170
+ value.match(new RegExp(`^(?:${pattern.source})$`, pattern.flags))
171
+ ) : value === pattern;
172
+ var createDataField = (field) => new DataField(
173
+ field.tag,
174
+ field.ind1,
175
+ field.ind2,
176
+ field.subfields.map((sf) => new Subfield(sf.code, sf.value))
177
+ );
178
+ var createControlField = (field) => new ControlField(field.tag, field.value);
179
+
180
+ // src/marc-record/fixInvalidMarcRecordSerialization.ts
181
+ var fixInvalidMarcRecordSerialization = (data) => data && typeof data === "object" && "fields" in data && Array.isArray(data.fields) ? {
182
+ ...data,
183
+ fields: data.fields.map(
184
+ (field) => field && typeof field === "object" && "ind1" in field && "ind2" in field ? {
185
+ ...field,
186
+ ind1: field.ind1 === "" ? " " : field.ind1,
187
+ ind2: field.ind2 === "" ? " " : field.ind2
188
+ } : field
189
+ )
190
+ } : data;
191
+
192
+ // src/marc-record/schema.ts
193
+ function createMarcSchema({
194
+ /**
195
+ * Require at least one field to be present. Defaults to true.
196
+ */
197
+ requireFields = true,
198
+ /**
199
+ * Require at least one subfield per data field to be present. Defaults to true.
200
+ */
201
+ requireSubfields = true,
202
+ /**
203
+ * Require a subfield value for each subfield. Defaults to true.
204
+ */
205
+ requireSubfieldValue = true
206
+ } = {}) {
207
+ return {
208
+ type: "object",
209
+ properties: {
210
+ format: {
211
+ type: "string"
212
+ },
213
+ leader: {
214
+ type: "string"
215
+ },
216
+ fields: {
217
+ type: "array",
218
+ minItems: requireFields ? 1 : 0,
219
+ items: {
220
+ anyOf: [
221
+ {
222
+ type: "object",
223
+ properties: {
224
+ tag: {
225
+ type: "string",
226
+ minLength: 1
227
+ },
228
+ value: {
229
+ type: "string",
230
+ minLength: 1
231
+ }
232
+ },
233
+ required: ["tag", "value"]
234
+ },
235
+ {
236
+ type: "object",
237
+ properties: {
238
+ tag: {
239
+ type: "string",
240
+ minLength: 1
241
+ },
242
+ ind1: {
243
+ type: "string",
244
+ minLength: 1,
245
+ maxLength: 1
246
+ },
247
+ ind2: {
248
+ type: "string",
249
+ minLength: 1,
250
+ maxLength: 1
251
+ },
252
+ subfields: {
253
+ type: "array",
254
+ minItems: requireSubfields ? 1 : 0,
255
+ items: {
256
+ type: "object",
257
+ properties: {
258
+ code: {
259
+ type: "string",
260
+ minLength: 1
261
+ },
262
+ value: {
263
+ type: "string",
264
+ minLength: requireSubfieldValue ? 1 : 0
265
+ }
266
+ },
267
+ required: requireSubfieldValue ? ["code", "value"] : ["code"]
268
+ }
269
+ }
270
+ },
271
+ required: ["tag", "ind1", "ind2", "subfields"]
272
+ }
273
+ ]
274
+ }
275
+ }
276
+ },
277
+ required: ["leader", "fields"]
278
+ };
279
+ }
280
+
281
+ // src/marc-record/MarcRecord.ts
282
+ var ValidationFailed = class extends Error {
283
+ constructor(errors) {
284
+ super("MarcRecord validation failed");
285
+ this.errors = errors;
286
+ }
287
+ };
288
+ var MarcRecord = class _MarcRecord {
289
+ format;
290
+ leader;
291
+ fields;
292
+ constructor({
293
+ leader,
294
+ fields,
295
+ format
296
+ }) {
297
+ this.format = format;
298
+ this.leader = leader;
299
+ this.fields = fields;
300
+ }
301
+ static fromJSON(data) {
302
+ const { format, leader, fields } = this.validateJSON(
303
+ fixInvalidMarcRecordSerialization(data)
304
+ );
305
+ return new _MarcRecord({
306
+ leader,
307
+ format,
308
+ fields: fields.map(
309
+ (field) => "subfields" in field ? createDataField(field) : createControlField(field)
310
+ )
311
+ });
312
+ }
313
+ static validateJSON(data) {
314
+ const schema = createMarcSchema();
315
+ const ajv = new Ajv__default.default();
316
+ const validator = ajv.compile(schema);
317
+ if (validator(data)) {
318
+ return data;
319
+ }
320
+ throw new ValidationFailed(validator.errors ?? []);
321
+ }
322
+ getControlFields() {
323
+ return this.fields.filter(isControlField);
324
+ }
325
+ getControlField(tag) {
326
+ return this.getControlFields().find((field) => field.tag === tag);
327
+ }
328
+ getDataFields(tag, indicators) {
329
+ const dataFields = this.fields.filter(isDataField);
330
+ return dataFields.filter(
331
+ (field) => (typeof tag === "undefined" || matchFullString(field.tag, tag)) && (typeof indicators?.ind1 === "undefined" || indicators.ind1 === field.ind1) && (typeof indicators?.ind2 === "undefined" || indicators.ind2 === field.ind2)
332
+ );
333
+ }
334
+ getFirstDataField(tag, indicators) {
335
+ return this.getDataFields(tag, indicators)[0];
336
+ }
337
+ getSubfields(tag, code, indicators) {
338
+ return this.getDataFields(tag, indicators).flatMap((field) => field.getSubfields(code)).filter((subfield) => subfield.value !== "");
339
+ }
340
+ getSubfieldValues(tag, code, indicators) {
341
+ return this.getSubfields(tag, code, indicators).map(({ value }) => value);
342
+ }
343
+ getFirstSubfieldValue(tag, code, indicators) {
344
+ return this.getSubfieldValues(tag, code, indicators)[0];
345
+ }
346
+ toJSON() {
347
+ return {
348
+ format: this.format,
349
+ leader: this.leader,
350
+ fields: this.fields.map((field) => field.toJSON())
351
+ };
352
+ }
353
+ toString() {
354
+ return this.fields.map((field) => field.toString()).join("\n");
355
+ }
356
+ };
357
+
358
+ // src/marc-record/namespace.ts
359
+ var marcXmlNamespaces = [
360
+ // The original MARC 21 XML schema (2002)
361
+ "http://www.loc.gov/MARC21/slim",
362
+ // Version 1 of the ISO 25577 MarcXchange schema (2007) is for all practical purposes
363
+ // the same schema as the original MARC 21 XML schema, but weakens restrictions to support
364
+ // other dialects than MARC 21 (not excluding the most esoteric ones).
365
+ "info:lc/xmlns/marcxchange-v1",
366
+ // Version 2 of MarcXchange adds support of embedded data, one of the many
367
+ // advanced XML features that a poor developer hopes not to encounter in the wild.
368
+ // Also weakens restrictions even further so that even a completely empty record is valid.
369
+ "info:lc/xmlns/marcxchange-v2"
370
+ ];
371
+ function detectNamespace(input) {
372
+ for (const possibleNamespace of marcXmlNamespaces) {
373
+ if (input.indexOf(possibleNamespace) !== -1) {
374
+ return possibleNamespace;
375
+ }
376
+ }
377
+ return "";
378
+ }
379
+
380
+ // src/marc-record/parseMarcXml.ts
381
+ function parseMarcXml(input, options = {}) {
382
+ const namespace = options.namespace ?? detectNamespace(input);
383
+ const xmlRecord = parseXml(input, {
384
+ namespaces: {
385
+ marc: namespace
386
+ }
387
+ });
388
+ const processControlField = options.processControlField ?? ((field) => field);
389
+ const processDataField = options.processDataField ?? ((field) => field);
390
+ return xmlRecord.elements("//marc:record").map((marcRecord) => {
391
+ const leader = marcRecord.text("marc:leader") ?? "";
392
+ const fields = marcRecord.elements("marc:controlfield | marc:datafield").reduce((fields2, field) => {
393
+ const tag = field.attr("tag");
394
+ if (!tag)
395
+ return fields2;
396
+ if (field.element?.localName === "controlfield") {
397
+ const newField = processControlField(
398
+ new ControlField(tag, field.text() ?? "")
399
+ );
400
+ return newField ? [...fields2, newField] : fields2;
401
+ } else {
402
+ const subfields = field.elements("marc:subfield").reduce((subfields2, subfield) => {
403
+ const code = subfield.attr("code");
404
+ return code ? [...subfields2, new Subfield(code, subfield.text() ?? "")] : subfields2;
405
+ }, []);
406
+ const newField = processDataField(
407
+ new DataField(
408
+ tag,
409
+ field.attr("ind1") ?? void 0,
410
+ field.attr("ind2") ?? void 0,
411
+ subfields
412
+ )
413
+ );
414
+ return newField ? [...fields2, newField] : fields2;
415
+ }
416
+ }, []);
417
+ return new MarcRecord({ leader, fields, format: options.format });
418
+ });
419
+ }
420
+
421
+ exports.ControlField = ControlField;
422
+ exports.DataField = DataField;
423
+ exports.MarcRecord = MarcRecord;
424
+ exports.Subfield = Subfield;
425
+ exports.XmlElement = XmlElement;
426
+ exports.createControlField = createControlField;
427
+ exports.createDataField = createDataField;
428
+ exports.parseMarcXml = parseMarcXml;
429
+ exports.parseXml = parseXml;
package/dist/index.mjs ADDED
@@ -0,0 +1,414 @@
1
+ import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
2
+ import xpath from 'xpath';
3
+ import Ajv from 'ajv';
4
+
5
+ // src/xml-element/XmlElement.ts
6
+
7
+ // src/xml-element/NodeType.ts
8
+ var NodeType = {
9
+ ELEMENT_NODE: 1,
10
+ ATTRIBUTE_NODE: 2,
11
+ TEXT_NODE: 3,
12
+ CDATA_SECTION_NODE: 4,
13
+ ENTITY_REFERENCE_NODE: 5,
14
+ ENTITY_NODE: 6,
15
+ PROCESSING_INSTRUCTION_NODE: 7,
16
+ COMMENT_NODE: 8,
17
+ DOCUMENT_NODE: 9,
18
+ DOCUMENT_TYPE_NODE: 1,
19
+ DOCUMENT_FRAGMENT_NODE: 1,
20
+ NOTATION_NODE: 1
21
+ };
22
+ var isNode = (value) => typeof value === "object" && value !== null && "nodeType" in value;
23
+ var isElementNode = (node) => isNode(node) && node.nodeType === NodeType.ELEMENT_NODE;
24
+
25
+ // src/xml-element/XmlElement.ts
26
+ var InvalidXml = class extends Error {
27
+ };
28
+ function parseXml(xmlText, { log = console, namespaces = {} } = {}) {
29
+ const errorHandler = (error) => {
30
+ const errorMsg = error instanceof Error ? error.message : String(error);
31
+ throw new InvalidXml(
32
+ `Failed to parse XML response: "${xmlText}". Error: "${errorMsg}"`
33
+ );
34
+ };
35
+ const doc = new DOMParser({
36
+ errorHandler: {
37
+ warning: (msg) => log.warn(msg),
38
+ error: errorHandler,
39
+ fatalError: errorHandler
40
+ }
41
+ }).parseFromString(xmlText, "text/xml");
42
+ return new XmlElement(doc, namespaces);
43
+ }
44
+ var XmlElement = class _XmlElement {
45
+ node;
46
+ namespaces;
47
+ select;
48
+ constructor(node, namespaces = {}) {
49
+ this.node = node;
50
+ this.namespaces = namespaces;
51
+ this.select = xpath.useNamespaces(namespaces);
52
+ }
53
+ toString() {
54
+ return new XMLSerializer().serializeToString(this.node);
55
+ }
56
+ /**
57
+ * Get the underlying Element node. Returns undefined if the underlying
58
+ * node is the root Document node.
59
+ */
60
+ get element() {
61
+ return isElementNode(this.node) ? this.node : void 0;
62
+ }
63
+ /**
64
+ * Get namespaces used in XPath queries.
65
+ */
66
+ getNamespaces() {
67
+ return this.namespaces;
68
+ }
69
+ /**
70
+ * Find anything (nodes, text, etc.) by xpath.
71
+ * @param {string} query - XPath query.
72
+ */
73
+ query(query) {
74
+ return this.select(query, this.node);
75
+ }
76
+ /**
77
+ * Find element nodes by XPath query.
78
+ * @param {string} query - XPath query.
79
+ */
80
+ elements(query) {
81
+ return this.query(query).filter(isElementNode).map((elementNode) => new _XmlElement(elementNode, this.namespaces));
82
+ }
83
+ /**
84
+ * Get text content of the current node or the *first* result matched by an XPath query.
85
+ * @param {string} query - XPath query.
86
+ */
87
+ text(query) {
88
+ if (!query) {
89
+ return this.node.textContent ? this.node.textContent : void 0;
90
+ }
91
+ return this.elements(query)[0]?.text();
92
+ }
93
+ /**
94
+ * Get atribute value of the current node.
95
+ * @param {string} name - Attribute name
96
+ */
97
+ attr(name) {
98
+ return isElementNode(this.node) ? this.node.getAttribute(name) : void 0;
99
+ }
100
+ };
101
+
102
+ // src/marc-record/MarcField.ts
103
+ var Subfield = class {
104
+ constructor(code, value) {
105
+ this.code = code;
106
+ this.value = value;
107
+ }
108
+ toJSON() {
109
+ return { code: this.code, value: this.value };
110
+ }
111
+ toString() {
112
+ return `$${this.code} ${this.value}`;
113
+ }
114
+ };
115
+ var ControlField = class {
116
+ constructor(tag, value) {
117
+ this.tag = tag;
118
+ this.value = value;
119
+ }
120
+ toJSON() {
121
+ return { tag: this.tag, value: this.value };
122
+ }
123
+ toString() {
124
+ return `${this.tag} ${this.value}`;
125
+ }
126
+ };
127
+ var DataField = class {
128
+ constructor(tag, ind1, ind2, subfields) {
129
+ this.tag = tag;
130
+ this.ind1 = ind1;
131
+ this.ind2 = ind2;
132
+ this.subfields = subfields;
133
+ }
134
+ getSubfields(code) {
135
+ return typeof code === "undefined" ? this.subfields : this.subfields.filter(
136
+ (subfield) => matchFullString(subfield.code, code)
137
+ );
138
+ }
139
+ getFirstSubfield(code) {
140
+ return this.subfields.find(
141
+ (subfield) => matchFullString(subfield.code, code)
142
+ );
143
+ }
144
+ getFirstSubfieldValue(code) {
145
+ return this.getFirstSubfield(code)?.value;
146
+ }
147
+ toJSON() {
148
+ return {
149
+ tag: this.tag,
150
+ ind1: this.ind1,
151
+ ind2: this.ind2,
152
+ subfields: this.subfields.map((subfield) => subfield.toJSON())
153
+ };
154
+ }
155
+ toString() {
156
+ return `${this.tag} ${this.subfields.map((subfield) => subfield.toString()).join(" ")}`;
157
+ }
158
+ };
159
+ var isControlField = (field) => field instanceof ControlField;
160
+ var isDataField = (field) => field instanceof DataField;
161
+ var matchFullString = (value, pattern) => pattern instanceof RegExp ? (
162
+ // Rewrite RegExp to match the full string
163
+ value.match(new RegExp(`^(?:${pattern.source})$`, pattern.flags))
164
+ ) : value === pattern;
165
+ var createDataField = (field) => new DataField(
166
+ field.tag,
167
+ field.ind1,
168
+ field.ind2,
169
+ field.subfields.map((sf) => new Subfield(sf.code, sf.value))
170
+ );
171
+ var createControlField = (field) => new ControlField(field.tag, field.value);
172
+
173
+ // src/marc-record/fixInvalidMarcRecordSerialization.ts
174
+ var fixInvalidMarcRecordSerialization = (data) => data && typeof data === "object" && "fields" in data && Array.isArray(data.fields) ? {
175
+ ...data,
176
+ fields: data.fields.map(
177
+ (field) => field && typeof field === "object" && "ind1" in field && "ind2" in field ? {
178
+ ...field,
179
+ ind1: field.ind1 === "" ? " " : field.ind1,
180
+ ind2: field.ind2 === "" ? " " : field.ind2
181
+ } : field
182
+ )
183
+ } : data;
184
+
185
+ // src/marc-record/schema.ts
186
+ function createMarcSchema({
187
+ /**
188
+ * Require at least one field to be present. Defaults to true.
189
+ */
190
+ requireFields = true,
191
+ /**
192
+ * Require at least one subfield per data field to be present. Defaults to true.
193
+ */
194
+ requireSubfields = true,
195
+ /**
196
+ * Require a subfield value for each subfield. Defaults to true.
197
+ */
198
+ requireSubfieldValue = true
199
+ } = {}) {
200
+ return {
201
+ type: "object",
202
+ properties: {
203
+ format: {
204
+ type: "string"
205
+ },
206
+ leader: {
207
+ type: "string"
208
+ },
209
+ fields: {
210
+ type: "array",
211
+ minItems: requireFields ? 1 : 0,
212
+ items: {
213
+ anyOf: [
214
+ {
215
+ type: "object",
216
+ properties: {
217
+ tag: {
218
+ type: "string",
219
+ minLength: 1
220
+ },
221
+ value: {
222
+ type: "string",
223
+ minLength: 1
224
+ }
225
+ },
226
+ required: ["tag", "value"]
227
+ },
228
+ {
229
+ type: "object",
230
+ properties: {
231
+ tag: {
232
+ type: "string",
233
+ minLength: 1
234
+ },
235
+ ind1: {
236
+ type: "string",
237
+ minLength: 1,
238
+ maxLength: 1
239
+ },
240
+ ind2: {
241
+ type: "string",
242
+ minLength: 1,
243
+ maxLength: 1
244
+ },
245
+ subfields: {
246
+ type: "array",
247
+ minItems: requireSubfields ? 1 : 0,
248
+ items: {
249
+ type: "object",
250
+ properties: {
251
+ code: {
252
+ type: "string",
253
+ minLength: 1
254
+ },
255
+ value: {
256
+ type: "string",
257
+ minLength: requireSubfieldValue ? 1 : 0
258
+ }
259
+ },
260
+ required: requireSubfieldValue ? ["code", "value"] : ["code"]
261
+ }
262
+ }
263
+ },
264
+ required: ["tag", "ind1", "ind2", "subfields"]
265
+ }
266
+ ]
267
+ }
268
+ }
269
+ },
270
+ required: ["leader", "fields"]
271
+ };
272
+ }
273
+
274
+ // src/marc-record/MarcRecord.ts
275
+ var ValidationFailed = class extends Error {
276
+ constructor(errors) {
277
+ super("MarcRecord validation failed");
278
+ this.errors = errors;
279
+ }
280
+ };
281
+ var MarcRecord = class _MarcRecord {
282
+ format;
283
+ leader;
284
+ fields;
285
+ constructor({
286
+ leader,
287
+ fields,
288
+ format
289
+ }) {
290
+ this.format = format;
291
+ this.leader = leader;
292
+ this.fields = fields;
293
+ }
294
+ static fromJSON(data) {
295
+ const { format, leader, fields } = this.validateJSON(
296
+ fixInvalidMarcRecordSerialization(data)
297
+ );
298
+ return new _MarcRecord({
299
+ leader,
300
+ format,
301
+ fields: fields.map(
302
+ (field) => "subfields" in field ? createDataField(field) : createControlField(field)
303
+ )
304
+ });
305
+ }
306
+ static validateJSON(data) {
307
+ const schema = createMarcSchema();
308
+ const ajv = new Ajv();
309
+ const validator = ajv.compile(schema);
310
+ if (validator(data)) {
311
+ return data;
312
+ }
313
+ throw new ValidationFailed(validator.errors ?? []);
314
+ }
315
+ getControlFields() {
316
+ return this.fields.filter(isControlField);
317
+ }
318
+ getControlField(tag) {
319
+ return this.getControlFields().find((field) => field.tag === tag);
320
+ }
321
+ getDataFields(tag, indicators) {
322
+ const dataFields = this.fields.filter(isDataField);
323
+ return dataFields.filter(
324
+ (field) => (typeof tag === "undefined" || matchFullString(field.tag, tag)) && (typeof indicators?.ind1 === "undefined" || indicators.ind1 === field.ind1) && (typeof indicators?.ind2 === "undefined" || indicators.ind2 === field.ind2)
325
+ );
326
+ }
327
+ getFirstDataField(tag, indicators) {
328
+ return this.getDataFields(tag, indicators)[0];
329
+ }
330
+ getSubfields(tag, code, indicators) {
331
+ return this.getDataFields(tag, indicators).flatMap((field) => field.getSubfields(code)).filter((subfield) => subfield.value !== "");
332
+ }
333
+ getSubfieldValues(tag, code, indicators) {
334
+ return this.getSubfields(tag, code, indicators).map(({ value }) => value);
335
+ }
336
+ getFirstSubfieldValue(tag, code, indicators) {
337
+ return this.getSubfieldValues(tag, code, indicators)[0];
338
+ }
339
+ toJSON() {
340
+ return {
341
+ format: this.format,
342
+ leader: this.leader,
343
+ fields: this.fields.map((field) => field.toJSON())
344
+ };
345
+ }
346
+ toString() {
347
+ return this.fields.map((field) => field.toString()).join("\n");
348
+ }
349
+ };
350
+
351
+ // src/marc-record/namespace.ts
352
+ var marcXmlNamespaces = [
353
+ // The original MARC 21 XML schema (2002)
354
+ "http://www.loc.gov/MARC21/slim",
355
+ // Version 1 of the ISO 25577 MarcXchange schema (2007) is for all practical purposes
356
+ // the same schema as the original MARC 21 XML schema, but weakens restrictions to support
357
+ // other dialects than MARC 21 (not excluding the most esoteric ones).
358
+ "info:lc/xmlns/marcxchange-v1",
359
+ // Version 2 of MarcXchange adds support of embedded data, one of the many
360
+ // advanced XML features that a poor developer hopes not to encounter in the wild.
361
+ // Also weakens restrictions even further so that even a completely empty record is valid.
362
+ "info:lc/xmlns/marcxchange-v2"
363
+ ];
364
+ function detectNamespace(input) {
365
+ for (const possibleNamespace of marcXmlNamespaces) {
366
+ if (input.indexOf(possibleNamespace) !== -1) {
367
+ return possibleNamespace;
368
+ }
369
+ }
370
+ return "";
371
+ }
372
+
373
+ // src/marc-record/parseMarcXml.ts
374
+ function parseMarcXml(input, options = {}) {
375
+ const namespace = options.namespace ?? detectNamespace(input);
376
+ const xmlRecord = parseXml(input, {
377
+ namespaces: {
378
+ marc: namespace
379
+ }
380
+ });
381
+ const processControlField = options.processControlField ?? ((field) => field);
382
+ const processDataField = options.processDataField ?? ((field) => field);
383
+ return xmlRecord.elements("//marc:record").map((marcRecord) => {
384
+ const leader = marcRecord.text("marc:leader") ?? "";
385
+ const fields = marcRecord.elements("marc:controlfield | marc:datafield").reduce((fields2, field) => {
386
+ const tag = field.attr("tag");
387
+ if (!tag)
388
+ return fields2;
389
+ if (field.element?.localName === "controlfield") {
390
+ const newField = processControlField(
391
+ new ControlField(tag, field.text() ?? "")
392
+ );
393
+ return newField ? [...fields2, newField] : fields2;
394
+ } else {
395
+ const subfields = field.elements("marc:subfield").reduce((subfields2, subfield) => {
396
+ const code = subfield.attr("code");
397
+ return code ? [...subfields2, new Subfield(code, subfield.text() ?? "")] : subfields2;
398
+ }, []);
399
+ const newField = processDataField(
400
+ new DataField(
401
+ tag,
402
+ field.attr("ind1") ?? void 0,
403
+ field.attr("ind2") ?? void 0,
404
+ subfields
405
+ )
406
+ );
407
+ return newField ? [...fields2, newField] : fields2;
408
+ }
409
+ }, []);
410
+ return new MarcRecord({ leader, fields, format: options.format });
411
+ });
412
+ }
413
+
414
+ export { ControlField, DataField, MarcRecord, Subfield, XmlElement, createControlField, createDataField, parseMarcXml, parseXml };
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "@biblioteksentralen/marc",
3
+ "version": "0.0.1",
4
+ "private": false,
5
+ "description": "MARC record parser and serializer",
6
+ "author": "Biblioteksentralen",
7
+ "license": "MIT",
8
+ "main": "dist/index.js",
9
+ "types": "./dist/index.d.ts",
10
+ "exports": {
11
+ ".": {
12
+ "import": "./dist/index.mjs",
13
+ "require": "./dist/index.js",
14
+ "types": "./dist/index.d.ts"
15
+ }
16
+ },
17
+ "files": [
18
+ "README.md",
19
+ "LICENSE",
20
+ "dist/**/*.{js,mjs,d.ts}"
21
+ ],
22
+ "dependencies": {
23
+ "@xmldom/xmldom": "^0.8.10",
24
+ "ajv": "^8.12.0",
25
+ "ts-log": "^2.2.5",
26
+ "xpath": "^0.0.32",
27
+ "zod": "^3.22.4"
28
+ },
29
+ "devDependencies": {
30
+ "@types/json-schema": "^7.0.15",
31
+ "@types/node": "^18.19.31",
32
+ "@vitest/coverage-v8": "^1.5.0",
33
+ "rimraf": "^5.0.5",
34
+ "tsup": "^8.0.2",
35
+ "typescript": "^5.3.3",
36
+ "vitest": "^1.5.0",
37
+ "@dataplattform/eslint-config": "1.0.0"
38
+ },
39
+ "scripts": {
40
+ "dev": "tsc -p tsconfig.build.json --watch --preserveWatchOutput",
41
+ "build": "tsup src/index.ts --format cjs,esm --dts --treeshake",
42
+ "test": "vitest run --poolOptions.threads.singleThread --reporter=verbose --coverage",
43
+ "test:watch": "vitest",
44
+ "clean": "rimraf dist",
45
+ "lint": "eslint ."
46
+ }
47
+ }