@biblioteksentralen/marc 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,14 +2,14 @@
2
2
 
3
3
  Package for representating MARC records in TypeScript and serialize to/from MARC XML and JSON.
4
4
 
5
- The JSON serialization is compatible with schema defined by <https://www.npmjs.com/package/@natlibfi/marc-record>
5
+ The JSON serialization is compatible with the schema defined by <https://www.npmjs.com/package/@natlibfi/marc-record>
6
6
 
7
7
  ## Usage
8
8
 
9
- ### Parsing XML
9
+ ### Parsing and serializing XML
10
10
 
11
11
  ```ts
12
- import { parseMarcXml } from "@biblioteksentralen/marc";
12
+ import { parseMarcXml, serializeMarcXml } from "@biblioteksentralen/marc";
13
13
 
14
14
  const xmlRecord = `
15
15
  <record xmlns="http://www.loc.gov/MARC21/slim">
@@ -38,9 +38,32 @@ const title = record
38
38
  .getSubfieldValues("245", /a|b|n|p/)
39
39
  .join(" ")
40
40
  ?.trim();
41
+
42
+ const xml = serializeMarcXml(record);
43
+ ```
44
+
45
+ ### Serializing Line MARC
46
+
47
+ ```ts
48
+ import {
49
+ createControlField,
50
+ createDataField,
51
+ createSubfield,
52
+ serializeLineMarc
53
+ } from "@biblioteksentralen/marc";
54
+
55
+ const record = new MarcRecord({
56
+ leader: "...",
57
+ fields: [
58
+ createControlField(...),
59
+ createDataField(...),
60
+ ]
61
+ });
62
+
63
+ const lineMarc = serializeLineMarc(record);
41
64
  ```
42
65
 
43
- ### Serializing / deserializing
66
+ ### Serializing and deserializing as JSON
44
67
 
45
68
  The MarcRecord class can be JSON serialized:
46
69
 
@@ -1,103 +1,10 @@
1
- import { DOMParser, XMLSerializer } from '@xmldom/xmldom';
2
- import xpath from 'xpath';
3
- import Ajv from 'ajv';
1
+ 'use strict';
4
2
 
5
- // src/xml-element/XmlElement.ts
3
+ var xmlUtils = require('@biblioteksentralen/xml-utils');
4
+ var ajv = require('ajv');
5
+ var zod = require('zod');
6
6
 
7
- // src/xml-element/NodeType.ts
8
- var NodeType = {
9
- ELEMENT_NODE: 1,
10
- ATTRIBUTE_NODE: 2,
11
- TEXT_NODE: 3,
12
- CDATA_SECTION_NODE: 4,
13
- ENTITY_REFERENCE_NODE: 5,
14
- ENTITY_NODE: 6,
15
- PROCESSING_INSTRUCTION_NODE: 7,
16
- COMMENT_NODE: 8,
17
- DOCUMENT_NODE: 9,
18
- DOCUMENT_TYPE_NODE: 1,
19
- DOCUMENT_FRAGMENT_NODE: 1,
20
- NOTATION_NODE: 1
21
- };
22
- var isNode = (value) => typeof value === "object" && value !== null && "nodeType" in value;
23
- var isElementNode = (node) => isNode(node) && node.nodeType === NodeType.ELEMENT_NODE;
24
-
25
- // src/xml-element/XmlElement.ts
26
- var InvalidXml = class extends Error {
27
- };
28
- function parseXml(xmlText, { log = console, namespaces = {} } = {}) {
29
- const errorHandler = (error) => {
30
- const errorMsg = error instanceof Error ? error.message : String(error);
31
- throw new InvalidXml(
32
- `Failed to parse XML response: "${xmlText}". Error: "${errorMsg}"`
33
- );
34
- };
35
- const doc = new DOMParser({
36
- errorHandler: {
37
- warning: (msg) => log.warn(msg),
38
- error: errorHandler,
39
- fatalError: errorHandler
40
- }
41
- }).parseFromString(xmlText, "text/xml");
42
- return new XmlElement(doc, namespaces);
43
- }
44
- var XmlElement = class _XmlElement {
45
- node;
46
- namespaces;
47
- select;
48
- constructor(node, namespaces = {}) {
49
- this.node = node;
50
- this.namespaces = namespaces;
51
- this.select = xpath.useNamespaces(namespaces);
52
- }
53
- toString() {
54
- return new XMLSerializer().serializeToString(this.node);
55
- }
56
- /**
57
- * Get the underlying Element node. Returns undefined if the underlying
58
- * node is the root Document node.
59
- */
60
- get element() {
61
- return isElementNode(this.node) ? this.node : void 0;
62
- }
63
- /**
64
- * Get namespaces used in XPath queries.
65
- */
66
- getNamespaces() {
67
- return this.namespaces;
68
- }
69
- /**
70
- * Find anything (nodes, text, etc.) by xpath.
71
- * @param {string} query - XPath query.
72
- */
73
- query(query) {
74
- return this.select(query, this.node);
75
- }
76
- /**
77
- * Find element nodes by XPath query.
78
- * @param {string} query - XPath query.
79
- */
80
- elements(query) {
81
- return this.query(query).filter(isElementNode).map((elementNode) => new _XmlElement(elementNode, this.namespaces));
82
- }
83
- /**
84
- * Get text content of the current node or the *first* result matched by an XPath query.
85
- * @param {string} query - XPath query.
86
- */
87
- text(query) {
88
- if (!query) {
89
- return this.node.textContent ? this.node.textContent : void 0;
90
- }
91
- return this.elements(query)[0]?.text();
92
- }
93
- /**
94
- * Get atribute value of the current node.
95
- * @param {string} name - Attribute name
96
- */
97
- attr(name) {
98
- return isElementNode(this.node) ? this.node.getAttribute(name) : void 0;
99
- }
100
- };
7
+ // src/marc-record/parseMarcXml.ts
101
8
 
102
9
  // src/marc-record/MarcField.ts
103
10
  var Subfield = class {
@@ -272,6 +179,7 @@ function createMarcSchema({
272
179
  }
273
180
 
274
181
  // src/marc-record/MarcRecord.ts
182
+ var validator = new ajv.Ajv().compile(createMarcSchema());
275
183
  var ValidationFailed = class extends Error {
276
184
  constructor(errors) {
277
185
  super("MarcRecord validation failed");
@@ -304,9 +212,6 @@ var MarcRecord = class _MarcRecord {
304
212
  });
305
213
  }
306
214
  static validateJSON(data) {
307
- const schema = createMarcSchema();
308
- const ajv = new Ajv();
309
- const validator = ajv.compile(schema);
310
215
  if (validator(data)) {
311
216
  return data;
312
217
  }
@@ -343,72 +248,165 @@ var MarcRecord = class _MarcRecord {
343
248
  fields: this.fields.map((field) => field.toJSON())
344
249
  };
345
250
  }
346
- toString() {
347
- return this.fields.map((field) => field.toString()).join("\n");
348
- }
349
251
  };
350
252
 
351
- // src/marc-record/namespace.ts
352
- var marcXmlNamespaces = [
353
- // The original MARC 21 XML schema (2002)
354
- "http://www.loc.gov/MARC21/slim",
355
- // Version 1 of the ISO 25577 MarcXchange schema (2007) is for all practical purposes
356
- // the same schema as the original MARC 21 XML schema, but weakens restrictions to support
357
- // other dialects than MARC 21 (not excluding the most esoteric ones).
358
- "info:lc/xmlns/marcxchange-v1",
359
- // Version 2 of MarcXchange adds support of embedded data, one of the many
360
- // advanced XML features that a poor developer hopes not to encounter in the wild.
361
- // Also weakens restrictions even further so that even a completely empty record is valid.
362
- "info:lc/xmlns/marcxchange-v2"
363
- ];
364
- function detectNamespace(input) {
365
- for (const possibleNamespace of marcXmlNamespaces) {
366
- if (input.indexOf(possibleNamespace) !== -1) {
367
- return possibleNamespace;
368
- }
253
+ // src/marc-record/MarcParseError.ts
254
+ var MarcParseError = class extends Error {
255
+ constructor(message, record) {
256
+ super(message);
257
+ this.record = record;
369
258
  }
370
- return "";
371
- }
259
+ };
372
260
 
373
261
  // src/marc-record/parseMarcXml.ts
374
- function parseMarcXml(input, options = {}) {
375
- const namespace = options.namespace ?? detectNamespace(input);
376
- const xmlRecord = parseXml(input, {
377
- namespaces: {
378
- marc: namespace
379
- }
380
- });
262
+ async function parseMarcXml(input, options = {}) {
263
+ const xmlRecord = typeof input === "string" ? await xmlUtils.parseXml(input) : input;
381
264
  const processControlField = options.processControlField ?? ((field) => field);
382
265
  const processDataField = options.processDataField ?? ((field) => field);
383
- return xmlRecord.elements("//marc:record").map((marcRecord) => {
384
- const leader = marcRecord.text("marc:leader") ?? "";
385
- const fields = marcRecord.elements("marc:controlfield | marc:datafield").reduce((fields2, field) => {
266
+ const { requireFields = true } = options;
267
+ const records = getRecords(xmlRecord);
268
+ return records.map((marcRecord) => {
269
+ const parseError = (message) => new MarcParseError(message, xmlUtils.serializeXml(marcRecord));
270
+ const leader = marcRecord.text("leader");
271
+ if (!leader) {
272
+ throw parseError("MARC record is missing leader");
273
+ }
274
+ const fields = marcRecord.children(/controlfield|datafield/).reduce((fields2, field) => {
386
275
  const tag = field.attr("tag");
387
- if (!tag)
388
- return fields2;
389
- if (field.element?.localName === "controlfield") {
390
- const newField = processControlField(
391
- new ControlField(tag, field.text() ?? "")
392
- );
276
+ if (!tag) return fields2;
277
+ if (field.name === "controlfield") {
278
+ const value = field.text();
279
+ if (!value && options.strict) {
280
+ throw parseError("MARC record control fields cannot be empty");
281
+ }
282
+ const newField = value ? processControlField(new ControlField(tag, value)) : void 0;
393
283
  return newField ? [...fields2, newField] : fields2;
394
284
  } else {
395
- const subfields = field.elements("marc:subfield").reduce((subfields2, subfield) => {
285
+ const subfields = field.children("subfield").reduce((subfields2, subfield) => {
396
286
  const code = subfield.attr("code");
397
- return code ? [...subfields2, new Subfield(code, subfield.text() ?? "")] : subfields2;
287
+ const value = subfield.text();
288
+ return code && value ? [...subfields2, new Subfield(code, value)] : subfields2;
398
289
  }, []);
399
- const newField = processDataField(
400
- new DataField(
401
- tag,
402
- field.attr("ind1") ?? void 0,
403
- field.attr("ind2") ?? void 0,
404
- subfields
405
- )
406
- );
290
+ const ind1 = field.attr("ind1");
291
+ const ind2 = field.attr("ind2");
292
+ if (options.strict && (ind1 === void 0 || ind2 === void 0)) {
293
+ throw parseError("MARC record data fields must have indicators");
294
+ }
295
+ if (subfields.length === 0 && options.strict) {
296
+ throw parseError(
297
+ "MARC record data fields must have at least one subfield"
298
+ );
299
+ }
300
+ const newField = subfields.length ? processDataField(
301
+ new DataField(tag, ind1 ?? " ", ind2 ?? " ", subfields)
302
+ ) : void 0;
407
303
  return newField ? [...fields2, newField] : fields2;
408
304
  }
409
305
  }, []);
306
+ if (fields.length === 0 && requireFields) {
307
+ throw parseError("MARC record must have at least one field");
308
+ }
410
309
  return new MarcRecord({ leader, fields, format: options.format });
411
310
  });
412
311
  }
312
+ function getRecords(node) {
313
+ switch (node.name) {
314
+ case "record":
315
+ return [node];
316
+ case "collection":
317
+ // MarcXchange
318
+ case "metadata":
319
+ return node.children("record");
320
+ default:
321
+ return [];
322
+ }
323
+ }
324
+ var controlFieldSchema = zod.z.object({
325
+ tag: zod.z.string().length(3, "MARC tag must be three characters long"),
326
+ value: zod.z.string()
327
+ });
328
+ var dataFieldSchema = zod.z.object({
329
+ tag: zod.z.string().length(3, "MARC tag must be three characters long"),
330
+ ind1: zod.z.string().length(1, "indicator 1 must be one character long").optional(),
331
+ ind2: zod.z.string().length(1, "indicator 2 must be one character long").optional(),
332
+ subfields: zod.z.array(zod.z.object({ code: zod.z.string(), value: zod.z.string() }))
333
+ });
334
+ var marcRecordZodSchema = zod.z.object({
335
+ format: zod.z.string().optional(),
336
+ leader: zod.z.string(),
337
+ fields: zod.z.array(zod.z.union([controlFieldSchema, dataFieldSchema]))
338
+ });
339
+
340
+ // src/marc-record/serializeLineMarc.ts
341
+ function serializeLineMarc(input) {
342
+ const leader = serializer.leader(input.leader);
343
+ const control = input.getControlFields().map(serializer.controlfield).join("");
344
+ const data = input.getDataFields().map(serializer.datafield).join("");
345
+ return `${leader}${control}${data}^
346
+ `;
347
+ }
348
+ var serializer = {
349
+ leader: (leader) => `*LDR ${leader}
350
+ `,
351
+ controlfield: (field) => `*${field.tag}${field.value}
352
+ `,
353
+ datafield: (field) => {
354
+ const ind1 = field.ind1 ?? " ";
355
+ const ind2 = field.ind2 ?? " ";
356
+ const subfields = field.subfields.map(
357
+ (subfield) => `$${subfield.code}${escapeSubfieldValue(subfield.value)}`
358
+ ).join("");
359
+ return `*${field.tag}${ind1}${ind2}${subfields}
360
+ `;
361
+ }
362
+ };
363
+ var escapeSubfieldValue = (value) => {
364
+ return value.replace(/\$/g, "$$");
365
+ };
366
+ function serializeMarcXml(input, pretty = false) {
367
+ const fields = [
368
+ xmlUtils.createXmlElement("leader", { text: input.leader }),
369
+ ...input.getControlFields().map(
370
+ (field) => xmlUtils.createXmlElement("controlfield", {
371
+ attributes: { tag: field.tag },
372
+ text: field.value
373
+ })
374
+ ),
375
+ ...input.getDataFields().map(
376
+ (field) => xmlUtils.createXmlElement("datafield", {
377
+ attributes: withoutEmptyValues({
378
+ tag: field.tag,
379
+ ind1: field.ind1,
380
+ ind2: field.ind2
381
+ }),
382
+ children: field.subfields.map(
383
+ (subfield) => xmlUtils.createXmlElement("subfield", {
384
+ attributes: { code: subfield.code },
385
+ text: subfield.value
386
+ })
387
+ )
388
+ })
389
+ )
390
+ ];
391
+ const recordNode = xmlUtils.createXmlElement("record", {
392
+ attributes: { xmlns: "http://www.loc.gov/MARC21/slim" },
393
+ children: fields
394
+ });
395
+ return xmlUtils.serializeXml(recordNode, pretty);
396
+ }
397
+ var withoutEmptyValues = (obj) => Object.keys(obj).reduce(
398
+ (acc, key) => obj[key] === void 0 ? { ...acc } : { ...acc, [key]: obj[key] },
399
+ {}
400
+ );
413
401
 
414
- export { ControlField, DataField, MarcRecord, Subfield, XmlElement, createControlField, createDataField, parseMarcXml, parseXml };
402
+ exports.ControlField = ControlField;
403
+ exports.DataField = DataField;
404
+ exports.MarcParseError = MarcParseError;
405
+ exports.MarcRecord = MarcRecord;
406
+ exports.Subfield = Subfield;
407
+ exports.createControlField = createControlField;
408
+ exports.createDataField = createDataField;
409
+ exports.marcRecordZodSchema = marcRecordZodSchema;
410
+ exports.parseMarcXml = parseMarcXml;
411
+ exports.serializeLineMarc = serializeLineMarc;
412
+ exports.serializeMarcXml = serializeMarcXml;
@@ -0,0 +1,199 @@
1
+ import { XmlElement } from '@biblioteksentralen/xml-utils';
2
+ import { z } from 'zod';
3
+
4
+ type SerializedMarcField = SerializedDataField | SerializedControlField;
5
+ interface SerializedControlField {
6
+ tag: string;
7
+ value: string;
8
+ }
9
+ interface SerializedDataField {
10
+ tag: string;
11
+ ind1?: string;
12
+ ind2?: string;
13
+ subfields: SerializedMarcSubfield[];
14
+ }
15
+ interface SerializedMarcSubfield {
16
+ code: string;
17
+ value: string;
18
+ }
19
+ declare class Subfield {
20
+ readonly code: string;
21
+ readonly value: string;
22
+ constructor(code: string, value: string);
23
+ toJSON(): SerializedMarcSubfield;
24
+ toString(): string;
25
+ }
26
+ declare class ControlField {
27
+ readonly tag: string;
28
+ readonly value: string;
29
+ constructor(tag: string, value: string);
30
+ toJSON(): SerializedControlField;
31
+ toString(): string;
32
+ }
33
+ declare class DataField {
34
+ readonly tag: string;
35
+ readonly ind1: string | undefined;
36
+ readonly ind2: string | undefined;
37
+ readonly subfields: Subfield[];
38
+ constructor(tag: string, ind1: string | undefined, ind2: string | undefined, subfields: Subfield[]);
39
+ getSubfields(code?: string | RegExp): Subfield[];
40
+ getFirstSubfield(code: string | RegExp): Subfield | undefined;
41
+ getFirstSubfieldValue(code: string | RegExp): string | undefined;
42
+ toJSON(): SerializedDataField;
43
+ toString(): string;
44
+ }
45
+ type MarcField = ControlField | DataField;
46
+ declare const createDataField: (field: SerializedDataField) => DataField;
47
+ declare const createControlField: (field: SerializedControlField) => ControlField;
48
+
49
+ type Indicators = {
50
+ ind1?: string;
51
+ ind2?: string;
52
+ };
53
+ interface SerializedMarcRecord {
54
+ /**
55
+ * Identifies the MARC format (examples: "MARC 21", "UNIMARC", "danMARC2", ...).
56
+ */
57
+ format?: string;
58
+ /**
59
+ * Corresponds to ISO 2709 record label, 24 bytes
60
+ */
61
+ leader: string;
62
+ /**
63
+ * List of control fields and data fields.
64
+ */
65
+ fields: SerializedMarcField[];
66
+ }
67
+ declare class MarcRecord {
68
+ readonly format: string | undefined;
69
+ readonly leader: string;
70
+ readonly fields: MarcField[];
71
+ constructor({ leader, fields, format, }: {
72
+ leader: string;
73
+ fields: MarcField[];
74
+ format?: string;
75
+ });
76
+ static fromJSON(data: unknown): MarcRecord;
77
+ static validateJSON(data: unknown): SerializedMarcRecord;
78
+ getControlFields(): ControlField[];
79
+ getControlField(tag: string): ControlField | undefined;
80
+ getDataFields(tag?: string | RegExp, indicators?: Indicators): DataField[];
81
+ getFirstDataField(tag: string | RegExp, indicators?: Indicators): DataField | undefined;
82
+ getSubfields(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): Subfield[];
83
+ getSubfieldValues(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string[];
84
+ getFirstSubfieldValue(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string | undefined;
85
+ toJSON(): SerializedMarcRecord;
86
+ }
87
+
88
+ interface MarcXmlOptions {
89
+ namespace?: string;
90
+ /**
91
+ * Callback function to transform or filter control fields.
92
+ */
93
+ processControlField?: (field: ControlField) => ControlField | undefined;
94
+ /**
95
+ * Callback function to transform or filter data fields.
96
+ */
97
+ processDataField?: (field: DataField) => DataField | undefined;
98
+ /**
99
+ * Free-form string that specifies the MARC record flavour.
100
+ */
101
+ format?: string;
102
+ /**
103
+ * Whether to require at least one field to be present. Defaults to true.
104
+ */
105
+ requireFields?: boolean;
106
+ /**
107
+ * If set to true, the parser throws an error if a record is missing indicators or have empty
108
+ * fields or subfields. If set to false, it will set default values for missing indicators and
109
+ * skip empty fields and subfields. Defaults to false.
110
+ */
111
+ strict?: boolean;
112
+ }
113
+ declare function parseMarcXml(input: string | XmlElement, options?: MarcXmlOptions): Promise<MarcRecord[]>;
114
+
115
+ declare const marcRecordZodSchema: z.ZodObject<{
116
+ format: z.ZodOptional<z.ZodString>;
117
+ leader: z.ZodString;
118
+ fields: z.ZodArray<z.ZodUnion<[z.ZodObject<{
119
+ tag: z.ZodString;
120
+ value: z.ZodString;
121
+ }, "strip", z.ZodTypeAny, {
122
+ tag: string;
123
+ value: string;
124
+ }, {
125
+ tag: string;
126
+ value: string;
127
+ }>, z.ZodObject<{
128
+ tag: z.ZodString;
129
+ ind1: z.ZodOptional<z.ZodString>;
130
+ ind2: z.ZodOptional<z.ZodString>;
131
+ subfields: z.ZodArray<z.ZodObject<{
132
+ code: z.ZodString;
133
+ value: z.ZodString;
134
+ }, "strip", z.ZodTypeAny, {
135
+ value: string;
136
+ code: string;
137
+ }, {
138
+ value: string;
139
+ code: string;
140
+ }>, "many">;
141
+ }, "strip", z.ZodTypeAny, {
142
+ tag: string;
143
+ subfields: {
144
+ value: string;
145
+ code: string;
146
+ }[];
147
+ ind1?: string | undefined;
148
+ ind2?: string | undefined;
149
+ }, {
150
+ tag: string;
151
+ subfields: {
152
+ value: string;
153
+ code: string;
154
+ }[];
155
+ ind1?: string | undefined;
156
+ ind2?: string | undefined;
157
+ }>]>, "many">;
158
+ }, "strip", z.ZodTypeAny, {
159
+ fields: ({
160
+ tag: string;
161
+ value: string;
162
+ } | {
163
+ tag: string;
164
+ subfields: {
165
+ value: string;
166
+ code: string;
167
+ }[];
168
+ ind1?: string | undefined;
169
+ ind2?: string | undefined;
170
+ })[];
171
+ leader: string;
172
+ format?: string | undefined;
173
+ }, {
174
+ fields: ({
175
+ tag: string;
176
+ value: string;
177
+ } | {
178
+ tag: string;
179
+ subfields: {
180
+ value: string;
181
+ code: string;
182
+ }[];
183
+ ind1?: string | undefined;
184
+ ind2?: string | undefined;
185
+ })[];
186
+ leader: string;
187
+ format?: string | undefined;
188
+ }>;
189
+
190
+ declare function serializeLineMarc(input: MarcRecord): string;
191
+
192
+ declare function serializeMarcXml(input: MarcRecord, pretty?: boolean): string;
193
+
194
+ declare class MarcParseError extends Error {
195
+ readonly record: string;
196
+ constructor(message: string, record: string);
197
+ }
198
+
199
+ export { ControlField, DataField, type MarcField, MarcParseError, MarcRecord, type SerializedControlField, type SerializedDataField, type SerializedMarcField, type SerializedMarcRecord, Subfield, createControlField, createDataField, marcRecordZodSchema, parseMarcXml, serializeLineMarc, serializeMarcXml };
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import xpath from 'xpath';
2
- import { Logger } from 'ts-log';
1
+ import { XmlElement } from '@biblioteksentralen/xml-utils';
2
+ import { z } from 'zod';
3
3
 
4
4
  type SerializedMarcField = SerializedDataField | SerializedControlField;
5
5
  interface SerializedControlField {
@@ -51,8 +51,17 @@ type Indicators = {
51
51
  ind2?: string;
52
52
  };
53
53
  interface SerializedMarcRecord {
54
+ /**
55
+ * Identifies the MARC format (examples: "MARC 21", "UNIMARC", "danMARC2", ...).
56
+ */
54
57
  format?: string;
58
+ /**
59
+ * Corresponds to ISO 2709 record label, 24 bytes
60
+ */
55
61
  leader: string;
62
+ /**
63
+ * List of control fields and data fields.
64
+ */
56
65
  fields: SerializedMarcField[];
57
66
  }
58
67
  declare class MarcRecord {
@@ -74,7 +83,6 @@ declare class MarcRecord {
74
83
  getSubfieldValues(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string[];
75
84
  getFirstSubfieldValue(tag: string | RegExp, code: string | RegExp, indicators?: Indicators): string | undefined;
76
85
  toJSON(): SerializedMarcRecord;
77
- toString(): string;
78
86
  }
79
87
 
80
88
  interface MarcXmlOptions {
@@ -91,53 +99,101 @@ interface MarcXmlOptions {
91
99
  * Free-form string that specifies the MARC record flavour.
92
100
  */
93
101
  format?: string;
94
- }
95
- declare function parseMarcXml(input: string, options?: MarcXmlOptions): MarcRecord[];
96
-
97
- type NamespaceMap = Record<string, string>;
98
- interface ParseOptions {
99
- log?: Logger;
100
- namespaces?: NamespaceMap;
101
- }
102
- declare function parseXml(xmlText: string, { log, namespaces }?: ParseOptions): XmlElement;
103
- /**
104
- * XPath-focused helper class for extracting information from an XML document.
105
- */
106
- declare class XmlElement {
107
- readonly node: Document | Element;
108
- protected namespaces: NamespaceMap;
109
- protected select: xpath.XPathSelect;
110
- constructor(node: Document | Element, namespaces?: NamespaceMap);
111
- toString(): string;
112
- /**
113
- * Get the underlying Element node. Returns undefined if the underlying
114
- * node is the root Document node.
115
- */
116
- get element(): Element | undefined;
117
102
  /**
118
- * Get namespaces used in XPath queries.
103
+ * Whether to require at least one field to be present. Defaults to true.
119
104
  */
120
- getNamespaces(): NamespaceMap;
105
+ requireFields?: boolean;
121
106
  /**
122
- * Find anything (nodes, text, etc.) by xpath.
123
- * @param {string} query - XPath query.
107
+ * If set to true, the parser throws an error if a record is missing indicators or have empty
108
+ * fields or subfields. If set to false, it will set default values for missing indicators and
109
+ * skip empty fields and subfields. Defaults to false.
124
110
  */
125
- query(query: string): xpath.SelectedValue[];
126
- /**
127
- * Find element nodes by XPath query.
128
- * @param {string} query - XPath query.
129
- */
130
- elements(query: string): XmlElement[];
131
- /**
132
- * Get text content of the current node or the *first* result matched by an XPath query.
133
- * @param {string} query - XPath query.
134
- */
135
- text(query?: string): string | undefined;
136
- /**
137
- * Get atribute value of the current node.
138
- * @param {string} name - Attribute name
139
- */
140
- attr(name: string): string | null | undefined;
111
+ strict?: boolean;
112
+ }
113
+ declare function parseMarcXml(input: string | XmlElement, options?: MarcXmlOptions): Promise<MarcRecord[]>;
114
+
115
+ declare const marcRecordZodSchema: z.ZodObject<{
116
+ format: z.ZodOptional<z.ZodString>;
117
+ leader: z.ZodString;
118
+ fields: z.ZodArray<z.ZodUnion<[z.ZodObject<{
119
+ tag: z.ZodString;
120
+ value: z.ZodString;
121
+ }, "strip", z.ZodTypeAny, {
122
+ tag: string;
123
+ value: string;
124
+ }, {
125
+ tag: string;
126
+ value: string;
127
+ }>, z.ZodObject<{
128
+ tag: z.ZodString;
129
+ ind1: z.ZodOptional<z.ZodString>;
130
+ ind2: z.ZodOptional<z.ZodString>;
131
+ subfields: z.ZodArray<z.ZodObject<{
132
+ code: z.ZodString;
133
+ value: z.ZodString;
134
+ }, "strip", z.ZodTypeAny, {
135
+ value: string;
136
+ code: string;
137
+ }, {
138
+ value: string;
139
+ code: string;
140
+ }>, "many">;
141
+ }, "strip", z.ZodTypeAny, {
142
+ tag: string;
143
+ subfields: {
144
+ value: string;
145
+ code: string;
146
+ }[];
147
+ ind1?: string | undefined;
148
+ ind2?: string | undefined;
149
+ }, {
150
+ tag: string;
151
+ subfields: {
152
+ value: string;
153
+ code: string;
154
+ }[];
155
+ ind1?: string | undefined;
156
+ ind2?: string | undefined;
157
+ }>]>, "many">;
158
+ }, "strip", z.ZodTypeAny, {
159
+ fields: ({
160
+ tag: string;
161
+ value: string;
162
+ } | {
163
+ tag: string;
164
+ subfields: {
165
+ value: string;
166
+ code: string;
167
+ }[];
168
+ ind1?: string | undefined;
169
+ ind2?: string | undefined;
170
+ })[];
171
+ leader: string;
172
+ format?: string | undefined;
173
+ }, {
174
+ fields: ({
175
+ tag: string;
176
+ value: string;
177
+ } | {
178
+ tag: string;
179
+ subfields: {
180
+ value: string;
181
+ code: string;
182
+ }[];
183
+ ind1?: string | undefined;
184
+ ind2?: string | undefined;
185
+ })[];
186
+ leader: string;
187
+ format?: string | undefined;
188
+ }>;
189
+
190
+ declare function serializeLineMarc(input: MarcRecord): string;
191
+
192
+ declare function serializeMarcXml(input: MarcRecord, pretty?: boolean): string;
193
+
194
+ declare class MarcParseError extends Error {
195
+ readonly record: string;
196
+ constructor(message: string, record: string);
141
197
  }
142
198
 
143
- export { ControlField, DataField, type MarcField, MarcRecord, type SerializedControlField, type SerializedDataField, type SerializedMarcRecord, Subfield, XmlElement, createControlField, createDataField, parseMarcXml, parseXml };
199
+ export { ControlField, DataField, type MarcField, MarcParseError, MarcRecord, type SerializedControlField, type SerializedDataField, type SerializedMarcField, type SerializedMarcRecord, Subfield, createControlField, createDataField, marcRecordZodSchema, parseMarcXml, serializeLineMarc, serializeMarcXml };
package/dist/index.js CHANGED
@@ -1,110 +1,8 @@
1
- 'use strict';
1
+ import { parseXml, createXmlElement, serializeXml } from '@biblioteksentralen/xml-utils';
2
+ import { Ajv } from 'ajv';
3
+ import { z } from 'zod';
2
4
 
3
- var xmldom = require('@xmldom/xmldom');
4
- var xpath = require('xpath');
5
- var Ajv = require('ajv');
6
-
7
- function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
8
-
9
- var xpath__default = /*#__PURE__*/_interopDefault(xpath);
10
- var Ajv__default = /*#__PURE__*/_interopDefault(Ajv);
11
-
12
- // src/xml-element/XmlElement.ts
13
-
14
- // src/xml-element/NodeType.ts
15
- var NodeType = {
16
- ELEMENT_NODE: 1,
17
- ATTRIBUTE_NODE: 2,
18
- TEXT_NODE: 3,
19
- CDATA_SECTION_NODE: 4,
20
- ENTITY_REFERENCE_NODE: 5,
21
- ENTITY_NODE: 6,
22
- PROCESSING_INSTRUCTION_NODE: 7,
23
- COMMENT_NODE: 8,
24
- DOCUMENT_NODE: 9,
25
- DOCUMENT_TYPE_NODE: 1,
26
- DOCUMENT_FRAGMENT_NODE: 1,
27
- NOTATION_NODE: 1
28
- };
29
- var isNode = (value) => typeof value === "object" && value !== null && "nodeType" in value;
30
- var isElementNode = (node) => isNode(node) && node.nodeType === NodeType.ELEMENT_NODE;
31
-
32
- // src/xml-element/XmlElement.ts
33
- var InvalidXml = class extends Error {
34
- };
35
- function parseXml(xmlText, { log = console, namespaces = {} } = {}) {
36
- const errorHandler = (error) => {
37
- const errorMsg = error instanceof Error ? error.message : String(error);
38
- throw new InvalidXml(
39
- `Failed to parse XML response: "${xmlText}". Error: "${errorMsg}"`
40
- );
41
- };
42
- const doc = new xmldom.DOMParser({
43
- errorHandler: {
44
- warning: (msg) => log.warn(msg),
45
- error: errorHandler,
46
- fatalError: errorHandler
47
- }
48
- }).parseFromString(xmlText, "text/xml");
49
- return new XmlElement(doc, namespaces);
50
- }
51
- var XmlElement = class _XmlElement {
52
- node;
53
- namespaces;
54
- select;
55
- constructor(node, namespaces = {}) {
56
- this.node = node;
57
- this.namespaces = namespaces;
58
- this.select = xpath__default.default.useNamespaces(namespaces);
59
- }
60
- toString() {
61
- return new xmldom.XMLSerializer().serializeToString(this.node);
62
- }
63
- /**
64
- * Get the underlying Element node. Returns undefined if the underlying
65
- * node is the root Document node.
66
- */
67
- get element() {
68
- return isElementNode(this.node) ? this.node : void 0;
69
- }
70
- /**
71
- * Get namespaces used in XPath queries.
72
- */
73
- getNamespaces() {
74
- return this.namespaces;
75
- }
76
- /**
77
- * Find anything (nodes, text, etc.) by xpath.
78
- * @param {string} query - XPath query.
79
- */
80
- query(query) {
81
- return this.select(query, this.node);
82
- }
83
- /**
84
- * Find element nodes by XPath query.
85
- * @param {string} query - XPath query.
86
- */
87
- elements(query) {
88
- return this.query(query).filter(isElementNode).map((elementNode) => new _XmlElement(elementNode, this.namespaces));
89
- }
90
- /**
91
- * Get text content of the current node or the *first* result matched by an XPath query.
92
- * @param {string} query - XPath query.
93
- */
94
- text(query) {
95
- if (!query) {
96
- return this.node.textContent ? this.node.textContent : void 0;
97
- }
98
- return this.elements(query)[0]?.text();
99
- }
100
- /**
101
- * Get atribute value of the current node.
102
- * @param {string} name - Attribute name
103
- */
104
- attr(name) {
105
- return isElementNode(this.node) ? this.node.getAttribute(name) : void 0;
106
- }
107
- };
5
+ // src/marc-record/parseMarcXml.ts
108
6
 
109
7
  // src/marc-record/MarcField.ts
110
8
  var Subfield = class {
@@ -279,6 +177,7 @@ function createMarcSchema({
279
177
  }
280
178
 
281
179
  // src/marc-record/MarcRecord.ts
180
+ var validator = new Ajv().compile(createMarcSchema());
282
181
  var ValidationFailed = class extends Error {
283
182
  constructor(errors) {
284
183
  super("MarcRecord validation failed");
@@ -311,9 +210,6 @@ var MarcRecord = class _MarcRecord {
311
210
  });
312
211
  }
313
212
  static validateJSON(data) {
314
- const schema = createMarcSchema();
315
- const ajv = new Ajv__default.default();
316
- const validator = ajv.compile(schema);
317
213
  if (validator(data)) {
318
214
  return data;
319
215
  }
@@ -350,80 +246,155 @@ var MarcRecord = class _MarcRecord {
350
246
  fields: this.fields.map((field) => field.toJSON())
351
247
  };
352
248
  }
353
- toString() {
354
- return this.fields.map((field) => field.toString()).join("\n");
355
- }
356
249
  };
357
250
 
358
- // src/marc-record/namespace.ts
359
- var marcXmlNamespaces = [
360
- // The original MARC 21 XML schema (2002)
361
- "http://www.loc.gov/MARC21/slim",
362
- // Version 1 of the ISO 25577 MarcXchange schema (2007) is for all practical purposes
363
- // the same schema as the original MARC 21 XML schema, but weakens restrictions to support
364
- // other dialects than MARC 21 (not excluding the most esoteric ones).
365
- "info:lc/xmlns/marcxchange-v1",
366
- // Version 2 of MarcXchange adds support of embedded data, one of the many
367
- // advanced XML features that a poor developer hopes not to encounter in the wild.
368
- // Also weakens restrictions even further so that even a completely empty record is valid.
369
- "info:lc/xmlns/marcxchange-v2"
370
- ];
371
- function detectNamespace(input) {
372
- for (const possibleNamespace of marcXmlNamespaces) {
373
- if (input.indexOf(possibleNamespace) !== -1) {
374
- return possibleNamespace;
375
- }
251
+ // src/marc-record/MarcParseError.ts
252
+ var MarcParseError = class extends Error {
253
+ constructor(message, record) {
254
+ super(message);
255
+ this.record = record;
376
256
  }
377
- return "";
378
- }
257
+ };
379
258
 
380
259
  // src/marc-record/parseMarcXml.ts
381
- function parseMarcXml(input, options = {}) {
382
- const namespace = options.namespace ?? detectNamespace(input);
383
- const xmlRecord = parseXml(input, {
384
- namespaces: {
385
- marc: namespace
386
- }
387
- });
260
+ async function parseMarcXml(input, options = {}) {
261
+ const xmlRecord = typeof input === "string" ? await parseXml(input) : input;
388
262
  const processControlField = options.processControlField ?? ((field) => field);
389
263
  const processDataField = options.processDataField ?? ((field) => field);
390
- return xmlRecord.elements("//marc:record").map((marcRecord) => {
391
- const leader = marcRecord.text("marc:leader") ?? "";
392
- const fields = marcRecord.elements("marc:controlfield | marc:datafield").reduce((fields2, field) => {
264
+ const { requireFields = true } = options;
265
+ const records = getRecords(xmlRecord);
266
+ return records.map((marcRecord) => {
267
+ const parseError = (message) => new MarcParseError(message, serializeXml(marcRecord));
268
+ const leader = marcRecord.text("leader");
269
+ if (!leader) {
270
+ throw parseError("MARC record is missing leader");
271
+ }
272
+ const fields = marcRecord.children(/controlfield|datafield/).reduce((fields2, field) => {
393
273
  const tag = field.attr("tag");
394
- if (!tag)
395
- return fields2;
396
- if (field.element?.localName === "controlfield") {
397
- const newField = processControlField(
398
- new ControlField(tag, field.text() ?? "")
399
- );
274
+ if (!tag) return fields2;
275
+ if (field.name === "controlfield") {
276
+ const value = field.text();
277
+ if (!value && options.strict) {
278
+ throw parseError("MARC record control fields cannot be empty");
279
+ }
280
+ const newField = value ? processControlField(new ControlField(tag, value)) : void 0;
400
281
  return newField ? [...fields2, newField] : fields2;
401
282
  } else {
402
- const subfields = field.elements("marc:subfield").reduce((subfields2, subfield) => {
283
+ const subfields = field.children("subfield").reduce((subfields2, subfield) => {
403
284
  const code = subfield.attr("code");
404
- return code ? [...subfields2, new Subfield(code, subfield.text() ?? "")] : subfields2;
285
+ const value = subfield.text();
286
+ return code && value ? [...subfields2, new Subfield(code, value)] : subfields2;
405
287
  }, []);
406
- const newField = processDataField(
407
- new DataField(
408
- tag,
409
- field.attr("ind1") ?? void 0,
410
- field.attr("ind2") ?? void 0,
411
- subfields
412
- )
413
- );
288
+ const ind1 = field.attr("ind1");
289
+ const ind2 = field.attr("ind2");
290
+ if (options.strict && (ind1 === void 0 || ind2 === void 0)) {
291
+ throw parseError("MARC record data fields must have indicators");
292
+ }
293
+ if (subfields.length === 0 && options.strict) {
294
+ throw parseError(
295
+ "MARC record data fields must have at least one subfield"
296
+ );
297
+ }
298
+ const newField = subfields.length ? processDataField(
299
+ new DataField(tag, ind1 ?? " ", ind2 ?? " ", subfields)
300
+ ) : void 0;
414
301
  return newField ? [...fields2, newField] : fields2;
415
302
  }
416
303
  }, []);
304
+ if (fields.length === 0 && requireFields) {
305
+ throw parseError("MARC record must have at least one field");
306
+ }
417
307
  return new MarcRecord({ leader, fields, format: options.format });
418
308
  });
419
309
  }
310
+ function getRecords(node) {
311
+ switch (node.name) {
312
+ case "record":
313
+ return [node];
314
+ case "collection":
315
+ // MarcXchange
316
+ case "metadata":
317
+ return node.children("record");
318
+ default:
319
+ return [];
320
+ }
321
+ }
322
+ var controlFieldSchema = z.object({
323
+ tag: z.string().length(3, "MARC tag must be three characters long"),
324
+ value: z.string()
325
+ });
326
+ var dataFieldSchema = z.object({
327
+ tag: z.string().length(3, "MARC tag must be three characters long"),
328
+ ind1: z.string().length(1, "indicator 1 must be one character long").optional(),
329
+ ind2: z.string().length(1, "indicator 2 must be one character long").optional(),
330
+ subfields: z.array(z.object({ code: z.string(), value: z.string() }))
331
+ });
332
+ var marcRecordZodSchema = z.object({
333
+ format: z.string().optional(),
334
+ leader: z.string(),
335
+ fields: z.array(z.union([controlFieldSchema, dataFieldSchema]))
336
+ });
337
+
338
+ // src/marc-record/serializeLineMarc.ts
339
+ function serializeLineMarc(input) {
340
+ const leader = serializer.leader(input.leader);
341
+ const control = input.getControlFields().map(serializer.controlfield).join("");
342
+ const data = input.getDataFields().map(serializer.datafield).join("");
343
+ return `${leader}${control}${data}^
344
+ `;
345
+ }
346
+ var serializer = {
347
+ leader: (leader) => `*LDR ${leader}
348
+ `,
349
+ controlfield: (field) => `*${field.tag}${field.value}
350
+ `,
351
+ datafield: (field) => {
352
+ const ind1 = field.ind1 ?? " ";
353
+ const ind2 = field.ind2 ?? " ";
354
+ const subfields = field.subfields.map(
355
+ (subfield) => `$${subfield.code}${escapeSubfieldValue(subfield.value)}`
356
+ ).join("");
357
+ return `*${field.tag}${ind1}${ind2}${subfields}
358
+ `;
359
+ }
360
+ };
361
+ var escapeSubfieldValue = (value) => {
362
+ return value.replace(/\$/g, "$$");
363
+ };
364
+ function serializeMarcXml(input, pretty = false) {
365
+ const fields = [
366
+ createXmlElement("leader", { text: input.leader }),
367
+ ...input.getControlFields().map(
368
+ (field) => createXmlElement("controlfield", {
369
+ attributes: { tag: field.tag },
370
+ text: field.value
371
+ })
372
+ ),
373
+ ...input.getDataFields().map(
374
+ (field) => createXmlElement("datafield", {
375
+ attributes: withoutEmptyValues({
376
+ tag: field.tag,
377
+ ind1: field.ind1,
378
+ ind2: field.ind2
379
+ }),
380
+ children: field.subfields.map(
381
+ (subfield) => createXmlElement("subfield", {
382
+ attributes: { code: subfield.code },
383
+ text: subfield.value
384
+ })
385
+ )
386
+ })
387
+ )
388
+ ];
389
+ const recordNode = createXmlElement("record", {
390
+ attributes: { xmlns: "http://www.loc.gov/MARC21/slim" },
391
+ children: fields
392
+ });
393
+ return serializeXml(recordNode, pretty);
394
+ }
395
+ var withoutEmptyValues = (obj) => Object.keys(obj).reduce(
396
+ (acc, key) => obj[key] === void 0 ? { ...acc } : { ...acc, [key]: obj[key] },
397
+ {}
398
+ );
420
399
 
421
- exports.ControlField = ControlField;
422
- exports.DataField = DataField;
423
- exports.MarcRecord = MarcRecord;
424
- exports.Subfield = Subfield;
425
- exports.XmlElement = XmlElement;
426
- exports.createControlField = createControlField;
427
- exports.createDataField = createDataField;
428
- exports.parseMarcXml = parseMarcXml;
429
- exports.parseXml = parseXml;
400
+ export { ControlField, DataField, MarcParseError, MarcRecord, Subfield, createControlField, createDataField, marcRecordZodSchema, parseMarcXml, serializeLineMarc, serializeMarcXml };
package/package.json CHANGED
@@ -1,38 +1,43 @@
1
1
  {
2
2
  "name": "@biblioteksentralen/marc",
3
- "version": "0.0.1",
3
+ "version": "0.0.3",
4
4
  "private": false,
5
+ "type": "module",
5
6
  "description": "MARC record parser and serializer",
6
7
  "author": "Biblioteksentralen",
7
8
  "license": "MIT",
8
- "main": "dist/index.js",
9
+ "main": "./dist/index.cjs",
10
+ "module": "./dist/index.js",
9
11
  "types": "./dist/index.d.ts",
10
12
  "exports": {
11
- ".": {
12
- "import": "./dist/index.mjs",
13
- "require": "./dist/index.js",
14
- "types": "./dist/index.d.ts"
13
+ "import": {
14
+ "types": "./dist/index.d.ts",
15
+ "import": "./dist/index.js"
16
+ },
17
+ "require": {
18
+ "types": "./dist/index.d.cts",
19
+ "require": "./dist/index.cjs"
15
20
  }
16
21
  },
17
22
  "files": [
18
23
  "README.md",
19
24
  "LICENSE",
20
- "dist/**/*.{js,mjs,d.ts}"
25
+ "dist/**/*.{js,cjs,mjs,ts,d.ts,d.cts}"
21
26
  ],
22
27
  "dependencies": {
23
- "@xmldom/xmldom": "^0.8.10",
24
- "ajv": "^8.12.0",
28
+ "ajv": "^8.17.1",
25
29
  "ts-log": "^2.2.5",
26
- "xpath": "^0.0.32",
27
- "zod": "^3.22.4"
30
+ "zod": "^3.23.8",
31
+ "@biblioteksentralen/xml-utils": "^0.0.1"
28
32
  },
29
33
  "devDependencies": {
34
+ "@arethetypeswrong/cli": "^0.15.4",
30
35
  "@types/json-schema": "^7.0.15",
31
- "@types/node": "^18.19.31",
36
+ "@types/node": "^20.14.14",
32
37
  "@vitest/coverage-v8": "^1.5.0",
33
38
  "rimraf": "^5.0.5",
34
39
  "tsup": "^8.0.2",
35
- "typescript": "^5.3.3",
40
+ "typescript": "^5.6.2",
36
41
  "vitest": "^1.5.0",
37
42
  "@dataplattform/eslint-config": "1.0.0"
38
43
  },
@@ -42,6 +47,7 @@
42
47
  "test": "vitest run --poolOptions.threads.singleThread --reporter=verbose --coverage",
43
48
  "test:watch": "vitest",
44
49
  "clean": "rimraf dist",
45
- "lint": "eslint ."
50
+ "lint": "eslint .",
51
+ "lint:package": "attw --pack"
46
52
  }
47
53
  }