marc-ts 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +172 -475
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +4 -4
- package/dist/index.js +198 -180
- package/dist/index.js.map +1 -1
- package/dist/marcjson.cjs +1 -1
- package/dist/marcjson.cjs.map +1 -1
- package/dist/marcjson.d.ts +11 -6
- package/dist/marcjson.js +53 -44
- package/dist/marcjson.js.map +1 -1
- package/dist/marctxt.cjs +4 -4
- package/dist/marctxt.cjs.map +1 -1
- package/dist/marctxt.d.ts +0 -10
- package/dist/marctxt.js +43 -50
- package/dist/marctxt.js.map +1 -1
- package/dist/marcxml.cjs +5 -5
- package/dist/marcxml.cjs.map +1 -1
- package/dist/marcxml.d.ts +0 -9
- package/dist/marcxml.js +67 -74
- package/dist/marcxml.js.map +1 -1
- package/dist/parser.d.ts +9 -38
- package/dist/serializer.d.ts +8 -39
- package/dist/warnings.d.ts +1 -2
- package/package.json +1 -1
package/dist/marcxml.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { t as $ } from "./types-c4Mo9m9u.js";
|
|
2
|
-
var
|
|
2
|
+
var v = /* @__PURE__ */ new Map([
|
|
3
3
|
["amp", "&"],
|
|
4
4
|
["lt", "<"],
|
|
5
5
|
["gt", ">"],
|
|
@@ -7,16 +7,16 @@ var M = /* @__PURE__ */ new Map([
|
|
|
7
7
|
["apos", "'"]
|
|
8
8
|
]);
|
|
9
9
|
function h(e) {
|
|
10
|
-
return e.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (i,
|
|
11
|
-
if (
|
|
12
|
-
const n = parseInt(
|
|
10
|
+
return e.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (i, s, c, t) => {
|
|
11
|
+
if (s !== void 0) {
|
|
12
|
+
const n = parseInt(s, 16);
|
|
13
13
|
return n >= 0 && n <= 1114111 ? String.fromCodePoint(n) : "�";
|
|
14
14
|
}
|
|
15
|
-
if (
|
|
16
|
-
const n = parseInt(
|
|
15
|
+
if (c !== void 0) {
|
|
16
|
+
const n = parseInt(c, 10);
|
|
17
17
|
return n >= 0 && n <= 1114111 ? String.fromCodePoint(n) : "�";
|
|
18
18
|
}
|
|
19
|
-
return
|
|
19
|
+
return v.get(t) ?? i;
|
|
20
20
|
});
|
|
21
21
|
}
|
|
22
22
|
function u(e) {
|
|
@@ -27,38 +27,38 @@ function f(e) {
|
|
|
27
27
|
return i === -1 ? e : e.slice(i + 1);
|
|
28
28
|
}
|
|
29
29
|
function m(e) {
|
|
30
|
-
const i = {},
|
|
31
|
-
let
|
|
32
|
-
for (; (
|
|
33
|
-
const t = f(
|
|
34
|
-
i[t] = h(
|
|
30
|
+
const i = {}, s = /([a-zA-Z_:][^\s=]*)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
|
|
31
|
+
let c;
|
|
32
|
+
for (; (c = s.exec(e)) !== null; ) {
|
|
33
|
+
const t = f(c[1]);
|
|
34
|
+
i[t] = h(c[2] ?? c[3] ?? "");
|
|
35
35
|
}
|
|
36
36
|
return i;
|
|
37
37
|
}
|
|
38
|
-
function
|
|
38
|
+
function b(e) {
|
|
39
39
|
const i = [];
|
|
40
|
-
let
|
|
41
|
-
for (;
|
|
42
|
-
const
|
|
43
|
-
if (
|
|
44
|
-
e.slice(
|
|
40
|
+
let s = 0;
|
|
41
|
+
for (; s < e.length; ) {
|
|
42
|
+
const c = e.indexOf("<", s);
|
|
43
|
+
if (c === -1) {
|
|
44
|
+
e.slice(s).trim() && i.push({
|
|
45
45
|
type: "text",
|
|
46
|
-
text: h(e.slice(
|
|
46
|
+
text: h(e.slice(s))
|
|
47
47
|
});
|
|
48
48
|
break;
|
|
49
49
|
}
|
|
50
|
-
if (
|
|
51
|
-
const
|
|
52
|
-
|
|
50
|
+
if (c > s) {
|
|
51
|
+
const o = e.slice(s, c);
|
|
52
|
+
o.trim() && i.push({
|
|
53
53
|
type: "text",
|
|
54
|
-
text: h(
|
|
54
|
+
text: h(o)
|
|
55
55
|
});
|
|
56
56
|
}
|
|
57
|
-
const t = e.indexOf(">",
|
|
57
|
+
const t = e.indexOf(">", c);
|
|
58
58
|
if (t === -1) break;
|
|
59
|
-
const n = e.slice(
|
|
59
|
+
const n = e.slice(c + 1, t);
|
|
60
60
|
if (n.startsWith("!") || n.startsWith("?")) {
|
|
61
|
-
|
|
61
|
+
s = t + 1;
|
|
62
62
|
continue;
|
|
63
63
|
}
|
|
64
64
|
if (n.startsWith("/")) i.push({
|
|
@@ -66,60 +66,60 @@ function v(e) {
|
|
|
66
66
|
name: f(n.slice(1).trim())
|
|
67
67
|
});
|
|
68
68
|
else if (n.endsWith("/")) {
|
|
69
|
-
const
|
|
69
|
+
const o = n.slice(0, -1).trim(), r = o.search(/\s/), l = r === -1 ? o : o.slice(0, r), p = r === -1 ? "" : o.slice(r);
|
|
70
70
|
i.push({
|
|
71
71
|
type: "self-close",
|
|
72
72
|
name: f(l),
|
|
73
73
|
attrs: m(p)
|
|
74
74
|
});
|
|
75
75
|
} else {
|
|
76
|
-
const
|
|
76
|
+
const o = n.search(/\s/), r = o === -1 ? n : n.slice(0, o), l = o === -1 ? "" : n.slice(o);
|
|
77
77
|
i.push({
|
|
78
78
|
type: "open",
|
|
79
|
-
name: f(
|
|
79
|
+
name: f(r),
|
|
80
80
|
attrs: m(l)
|
|
81
81
|
});
|
|
82
82
|
}
|
|
83
|
-
|
|
83
|
+
s = t + 1;
|
|
84
84
|
}
|
|
85
85
|
return i;
|
|
86
86
|
}
|
|
87
87
|
function w(e, i) {
|
|
88
|
-
let
|
|
89
|
-
const
|
|
88
|
+
let s = "";
|
|
89
|
+
const c = [];
|
|
90
90
|
let t = i;
|
|
91
91
|
for (; t < e.length; ) {
|
|
92
92
|
const n = e[t];
|
|
93
93
|
if (n.type === "close" && n.name === "record") return {
|
|
94
94
|
record: {
|
|
95
|
-
leader:
|
|
96
|
-
fields:
|
|
95
|
+
leader: s,
|
|
96
|
+
fields: c
|
|
97
97
|
},
|
|
98
98
|
end: t + 1
|
|
99
99
|
};
|
|
100
100
|
if (n.type === "open" && n.name === "leader") {
|
|
101
|
-
t++, t < e.length && e[t].type === "text" && (
|
|
101
|
+
t++, t < e.length && e[t].type === "text" && (s = e[t].text.trim(), t++), t < e.length && e[t].type === "close" && t++;
|
|
102
102
|
continue;
|
|
103
103
|
}
|
|
104
104
|
if (n.type === "self-close" && n.name === "controlfield") {
|
|
105
|
-
|
|
105
|
+
c.push({
|
|
106
106
|
tag: n.attrs?.tag ?? "",
|
|
107
107
|
data: ""
|
|
108
108
|
}), t++;
|
|
109
109
|
continue;
|
|
110
110
|
}
|
|
111
111
|
if (n.type === "open" && n.name === "controlfield") {
|
|
112
|
-
const
|
|
112
|
+
const o = n.attrs?.tag ?? "";
|
|
113
113
|
t++;
|
|
114
|
-
let
|
|
115
|
-
t < e.length && e[t].type === "text" && (
|
|
116
|
-
tag:
|
|
117
|
-
data:
|
|
114
|
+
let r = "";
|
|
115
|
+
t < e.length && e[t].type === "text" && (r = e[t].text ?? "", t++), t < e.length && e[t].type === "close" && t++, c.push({
|
|
116
|
+
tag: o,
|
|
117
|
+
data: r
|
|
118
118
|
});
|
|
119
119
|
continue;
|
|
120
120
|
}
|
|
121
121
|
if (n.type === "self-close" && n.name === "datafield") {
|
|
122
|
-
|
|
122
|
+
c.push({
|
|
123
123
|
tag: n.attrs?.tag ?? "",
|
|
124
124
|
indicator1: n.attrs?.ind1 ?? " ",
|
|
125
125
|
indicator2: n.attrs?.ind2 ?? " ",
|
|
@@ -128,7 +128,7 @@ function w(e, i) {
|
|
|
128
128
|
continue;
|
|
129
129
|
}
|
|
130
130
|
if (n.type === "open" && n.name === "datafield") {
|
|
131
|
-
const
|
|
131
|
+
const o = n.attrs?.tag ?? "", r = n.attrs?.ind1 ?? " ", l = n.attrs?.ind2 ?? " ", p = [];
|
|
132
132
|
for (t++; t < e.length; ) {
|
|
133
133
|
const d = e[t];
|
|
134
134
|
if (d.type === "close" && d.name === "datafield") {
|
|
@@ -147,9 +147,9 @@ function w(e, i) {
|
|
|
147
147
|
}
|
|
148
148
|
t++;
|
|
149
149
|
}
|
|
150
|
-
|
|
151
|
-
tag:
|
|
152
|
-
indicator1:
|
|
150
|
+
c.push({
|
|
151
|
+
tag: o,
|
|
152
|
+
indicator1: r,
|
|
153
153
|
indicator2: l,
|
|
154
154
|
subfields: p
|
|
155
155
|
});
|
|
@@ -159,62 +159,55 @@ function w(e, i) {
|
|
|
159
159
|
}
|
|
160
160
|
return {
|
|
161
161
|
record: {
|
|
162
|
-
leader:
|
|
163
|
-
fields:
|
|
162
|
+
leader: s,
|
|
163
|
+
fields: c
|
|
164
164
|
},
|
|
165
165
|
end: t
|
|
166
166
|
};
|
|
167
167
|
}
|
|
168
|
-
function
|
|
169
|
-
const i =
|
|
170
|
-
let
|
|
171
|
-
for (;
|
|
172
|
-
const t = i[
|
|
168
|
+
function M(e) {
|
|
169
|
+
const i = b(e), s = [];
|
|
170
|
+
let c = 0;
|
|
171
|
+
for (; c < i.length; ) {
|
|
172
|
+
const t = i[c];
|
|
173
173
|
if (t.type === "open" && t.name === "record") {
|
|
174
|
-
const { record: n, end:
|
|
175
|
-
|
|
174
|
+
const { record: n, end: o } = w(i, c + 1);
|
|
175
|
+
s.push(n), c = o;
|
|
176
176
|
continue;
|
|
177
177
|
}
|
|
178
|
-
|
|
178
|
+
c++;
|
|
179
179
|
}
|
|
180
|
-
return
|
|
181
|
-
}
|
|
182
|
-
function E(e) {
|
|
183
|
-
const i = b(e);
|
|
184
|
-
if (i.length === 0) throw new Error("No MARC record found in MARCXML input");
|
|
185
|
-
return i[0];
|
|
180
|
+
return s;
|
|
186
181
|
}
|
|
187
182
|
var A = `<?xml version="1.0" encoding="UTF-8"?>
|
|
188
183
|
`, y = 'xmlns="http://www.loc.gov/MARC21/slim"', a = " ";
|
|
189
184
|
function C(e) {
|
|
190
185
|
const i = [`<record ${y}>`];
|
|
191
186
|
i.push(`${a}<leader>${u(e.leader)}</leader>`);
|
|
192
|
-
for (const
|
|
187
|
+
for (const s of e.fields) if ($(s)) i.push(`${a}<controlfield tag="${s.tag}">${u(s.data)}</controlfield>`);
|
|
193
188
|
else {
|
|
194
|
-
const
|
|
195
|
-
i.push(`${a}<datafield tag="${
|
|
196
|
-
for (const n of
|
|
189
|
+
const c = s.indicator1 === " " ? " " : s.indicator1, t = s.indicator2 === " " ? " " : s.indicator2;
|
|
190
|
+
i.push(`${a}<datafield tag="${s.tag}" ind1="${c}" ind2="${t}">`);
|
|
191
|
+
for (const n of s.subfields) i.push(`${a}${a}<subfield code="${n.code}">${u(n.value)}</subfield>`);
|
|
197
192
|
i.push(`${a}</datafield>`);
|
|
198
193
|
}
|
|
199
194
|
return i.push("</record>"), i.join(`
|
|
200
195
|
`);
|
|
201
196
|
}
|
|
202
|
-
function
|
|
197
|
+
function X(e) {
|
|
203
198
|
const i = [A, `<collection ${y}>`];
|
|
204
|
-
for (const
|
|
205
|
-
const
|
|
199
|
+
for (const s of e) {
|
|
200
|
+
const c = C(s).split(`
|
|
206
201
|
`).map((t) => a + t).join(`
|
|
207
202
|
`);
|
|
208
|
-
i.push(
|
|
203
|
+
i.push(c);
|
|
209
204
|
}
|
|
210
205
|
return i.push("</collection>"), i.join(`
|
|
211
206
|
`);
|
|
212
207
|
}
|
|
213
208
|
export {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
I as serializeMarcXml,
|
|
217
|
-
C as serializeMarcXmlRecord
|
|
209
|
+
M as parseMarcXml,
|
|
210
|
+
X as serializeMarcXml
|
|
218
211
|
};
|
|
219
212
|
|
|
220
213
|
//# sourceMappingURL=marcxml.js.map
|
package/dist/marcxml.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"marcxml.js","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, ' ');\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Sufficient for the well-constrained MARCXML format.\n */\nfunction tokenise(xml: string): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n const gtPos = xml.indexOf('>', ltPos);\n if (gtPos === -1) break;\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n // Skip comments, PIs, DOCTYPE\n if (tag.startsWith('!') || tag.startsWith('?')) {\n i = gtPos + 1;\n continue;\n }\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({ type: 'self-close', name: localName(name), attrs: parseAttrs(attrStr) });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({ type: 'open', name: localName(name), attrs: parseAttrs(attrStr) });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n * Mutates `pos` via the returned index.\n */\nfunction parseRecordTokens(tokens: Token[], start: number): { record: MarcRecord; end: number } {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n fields.push({ tag: tok.attrs?.['tag'] ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'] ?? '';\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag, data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n fields.push({\n tag: tok.attrs?.['tag'] ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'] ?? '';\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'] ?? '';\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code, value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag, indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all records found.\n */\nexport function parseMarcXml(xml: string): MarcRecord[] {\n const tokens = tokenise(xml);\n const records: MarcRecord[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const { record, end } = parseRecordTokens(tokens, i + 1);\n records.push(record);\n i = end;\n continue;\n }\n i++;\n }\n\n return records;\n}\n\n/**\n * Parse a MARCXML string expected to contain exactly one `<record>`.\n * Throws if no record is found.\n */\nexport function parseMarcXmlRecord(xml: string): MarcRecord {\n const records = parseMarcXml(xml);\n if (records.length === 0) throw new Error('No MARC record found in MARCXML input');\n return records[0]!;\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\n/**\n * Serialize a single MarcRecord to a `<record>` XML element string (no collection wrapper).\n */\nexport function serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`${INDENT}<controlfield tag=\"${field.tag}\">${escapeXml(field.data)}</controlfield>`);\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(`${INDENT}<datafield tag=\"${field.tag}\" ind1=\"${ind1}\" ind2=\"${ind2}\">`);\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${sf.code}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [\n XML_HEADER,\n `<collection ${COLLECTION_NS}>`,\n ];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":";AAgBA,IAAM,IAA0C,oBAAI,IAAI;AAAA,EACtD,CAAC,OAAO,GAAG;AAAA,EACX,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,QAAQ,GAAG;AAAA,EACZ,CAAC,QAAQ,GAAG;AACd,CAAC;AAED,SAAS,EAAY,GAAsB;AACzC,SAAO,EAAK,QAAQ,iDAAA,CAAkD,GAAG,GAAK,GAAK,MAAS;AAC1F,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,WAAO,EAAW,IAAI,CAAI,KAAK;AAAA,EACjC,CAAC;AACH;AAEA,SAAS,EAAU,GAAsB;AACvC,SAAO,EAIJ,QAAQ,iCAAiC,GAAG,EAC5C,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ,EAGtB,QAAQ,OAAO,OAAO;AAC3B;AAeA,SAAS,EAAU,GAAqB;AACtC,QAAM,IAAQ,EAAI,QAAQ,GAAG;AAC7B,SAAO,MAAU,KAAK,IAAM,EAAI,MAAM,IAAQ,CAAC;AACjD;AAKA,SAAS,EAAW,GAAyC;AAC3D,QAAM,IAAgC,CAAC,GACjC,IAAK;AACX,MAAI;AACJ,UAAQ,IAAI,EAAG,KAAK,CAAO,OAAO,QAAM;AACtC,UAAM,IAAM,EAAU,EAAE,CAAA,CAAG;AAC3B,IAAA,EAAM,CAAA,IAAO,EAAY,EAAE,CAAA,KAAM,EAAE,CAAA,KAAM,EAAE;AAAA,EAC7C;AACA,SAAO;AACT;AAOA,SAAS,EAAS,GAAsB;AACtC,QAAM,IAAkB,CAAC;AACzB,MAAI,IAAI;AAER,SAAO,IAAI,EAAI,UAAQ;AACrB,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAC;AAGhC,QAAI,MAAU,IAAI;AAEhB,MADa,EAAI,MAAM,CAAC,EAAE,KACtB,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,EAAI,MAAM,CAAC,CAAC;AAAA,MAAE,CAAC;AACvE;AAAA,IACF;AAEA,QAAI,IAAQ,GAAG;AACb,YAAM,IAAM,EAAI,MAAM,GAAG,CAAK;AAE9B,MADa,EAAI,KACb,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,CAAG;AAAA,MAAE,CAAC;AAAA,IAChE;AAEA,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAK;AACpC,QAAI,MAAU,GAAI;AAElB,UAAM,IAAM,EAAI,MAAM,IAAQ,GAAG,CAAK;AAGtC,QAAI,EAAI,WAAW,GAAG,KAAK,EAAI,WAAW,GAAG,GAAG;AAC9C,MAAA,IAAI,IAAQ;AACZ;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,GAAG,EACpB,CAAA,EAAO,KAAK;AAAA,MAAE,MAAM;AAAA,MAAS,MAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAAE,CAAC;AAAA,aAC1D,EAAI,SAAS,GAAG,GAAG;AAC5B,YAAM,IAAQ,EAAI,MAAM,GAAG,EAAE,EAAE,KAAK,GAC9B,IAAW,EAAM,OAAO,IAAI,GAC5B,IAAO,MAAa,KAAK,IAAQ,EAAM,MAAM,GAAG,CAAQ,GACxD,IAAU,MAAa,KAAK,KAAK,EAAM,MAAM,CAAQ;AAC3D,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAc,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACvF,OAAO;AACL,YAAM,IAAW,EAAI,OAAO,IAAI,GAC1B,IAAO,MAAa,KAAK,IAAM,EAAI,MAAM,GAAG,CAAQ,GACpD,IAAU,MAAa,KAAK,KAAK,EAAI,MAAM,CAAQ;AACzD,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACjF;AAEA,IAAA,IAAI,IAAQ;AAAA,EACd;AAEA,SAAO;AACT;AAQA,SAAS,EAAkB,GAAiB,GAAoD;AAC9F,MAAI,IAAS;AACb,QAAM,IAAuC,CAAC;AAC9C,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AAEnB,QAAI,EAAI,SAAS,WAAW,EAAI,SAAS,SACvC,QAAO;AAAA,MAAE,QAAQ;AAAA,QAAE,QAAA;AAAA,QAAQ,QAAA;AAAA,MAAO;AAAA,MAAG,KAAK,IAAI;AAAA,IAAE;AAGlD,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,MAAA,KACI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAS,EAAO,CAAA,EAAI,KAAM,KAAK,GAC/B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS;AACtD;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,gBAAgB;AAC5D,MAAA,EAAO,KAAK;AAAA,QAAE,KAAK,EAAI,OAAQ,OAAU;AAAA,QAAI,MAAM;AAAA,MAAG,CAAC,GACvD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,gBAAgB;AACtD,YAAM,IAAM,EAAI,OAAQ,OAAU;AAClC,MAAA;AACA,UAAI,IAAO;AACX,MAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAO,EAAO,CAAA,EAAI,QAAQ,IAC1B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,MAAA;AAAA,MAAK,CAAC;AACzB;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,aAAa;AACzD,MAAA,EAAO,KAAK;AAAA,QACV,KAAK,EAAI,OAAQ,OAAU;AAAA,QAC3B,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,WAAW,CAAC;AAAA,MACd,CAAC,GACD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,aAAa;AACnD,YAAM,IAAM,EAAI,OAAQ,OAAU,IAC5B,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAwB,CAAC;AAG/B,WAFA,KAEO,IAAI,EAAO,UAAQ;AACxB,cAAM,IAAO,EAAO,CAAA;AACpB,YAAI,EAAK,SAAS,WAAW,EAAK,SAAS,aAAa;AACtD,UAAA;AACA;AAAA,QACF;AACA,YAAI,EAAK,SAAS,UAAU,EAAK,SAAS,YAAY;AACpD,gBAAM,IAAO,EAAK,OAAQ,QAAW;AACrC,UAAA;AACA,cAAI,IAAQ;AACZ,UAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAQ,EAAO,CAAA,EAAI,QAAQ,IAC3B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAU,KAAK;AAAA,YAAE,MAAA;AAAA,YAAM,OAAA;AAAA,UAAM,CAAC;AAC9B;AAAA,QACF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,YAAA;AAAA,QAAY,YAAA;AAAA,QAAY,WAAA;AAAA,MAAU,CAAC;AACtD;AAAA,IACF;AAEA,IAAA;AAAA,EACF;AAEA,SAAO;AAAA,IAAE,QAAQ;AAAA,MAAE,QAAA;AAAA,MAAQ,QAAA;AAAA,IAAO;AAAA,IAAG,KAAK;AAAA,EAAE;AAC9C;AAMA,SAAgB,EAAa,GAA2B;AACtD,QAAM,IAAS,EAAS,CAAG,GACrB,IAAwB,CAAC;AAC/B,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AACnB,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,YAAM,EAAE,QAAA,GAAQ,KAAA,EAAA,IAAQ,EAAkB,GAAQ,IAAI,CAAC;AACvD,MAAA,EAAQ,KAAK,CAAM,GACnB,IAAI;AACJ;AAAA,IACF;AACA,IAAA;AAAA,EACF;AAEA,SAAO;AACT;AAMA,SAAgB,EAAmB,GAAyB;AAC1D,QAAM,IAAU,EAAa,CAAG;AAChC,MAAI,EAAQ,WAAW,EAAG,OAAM,IAAI,MAAM,uCAAuC;AACjF,SAAO,EAAQ,CAAA;AACjB;AAIA,IAAM,IAAa;AAAA,GACb,IAAgB,0CAChB,IAAS;AAKf,SAAgB,EAAuB,GAA4B;AACjE,QAAM,IAAkB,CAAC,WAAW,CAAA,GAAgB;AACpD,EAAA,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY;AAElE,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KAAK,GAAG,CAAA,sBAA4B,EAAM,GAAA,KAAQ,EAAU,EAAM,IAAI,CAAA,iBAAkB;AAAA,OACzF;AACL,UAAM,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM,YAC9C,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM;AACpD,IAAA,EAAM,KAAK,GAAG,CAAA,mBAAyB,EAAM,GAAA,WAAc,CAAA,WAAe,CAAA,IAAQ;AAClF,eAAW,KAAM,EAAM,UACrB,CAAA,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAG,IAAA,KAAS,EAAU,EAAG,KAAK,CAAA,aACrE;AAEF,IAAA,EAAM,KAAK,GAAG,CAAA,cAAoB;AAAA,EACpC;AAGF,SAAA,EAAM,KAAK,WAAW,GACf,EAAM,KAAK;AAAA,CAAI;AACxB;AAKA,SAAgB,EAAiB,GAA+B;AAC9D,QAAM,IAAkB,CACtB,GACA,eAAe,CAAA,GACjB;AAEA,aAAW,KAAU,GAAS;AAE5B,UAAM,IAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAA,CAAK,MAAS,IAAS,CAAI,EAC3B,KAAK;AAAA,CAAI;AACZ,IAAA,EAAM,KAAK,CAAS;AAAA,EACtB;AAEA,SAAA,EAAM,KAAK,eAAe,GACnB,EAAM,KAAK;AAAA,CAAI;AACxB"}
|
|
1
|
+
{"version":3,"file":"marcxml.js","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, ' ');\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Sufficient for the well-constrained MARCXML format.\n */\nfunction tokenise(xml: string): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n const gtPos = xml.indexOf('>', ltPos);\n if (gtPos === -1) break;\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n // Skip comments, PIs, DOCTYPE\n if (tag.startsWith('!') || tag.startsWith('?')) {\n i = gtPos + 1;\n continue;\n }\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({ type: 'self-close', name: localName(name), attrs: parseAttrs(attrStr) });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({ type: 'open', name: localName(name), attrs: parseAttrs(attrStr) });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n * Mutates `pos` via the returned index.\n */\nfunction parseRecordTokens(tokens: Token[], start: number): { record: MarcRecord; end: number } {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n fields.push({ tag: tok.attrs?.['tag'] ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'] ?? '';\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag, data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n fields.push({\n tag: tok.attrs?.['tag'] ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'] ?? '';\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'] ?? '';\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code, value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag, indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all records found.\n */\nexport function parseMarcXml(xml: string): MarcRecord[] {\n const tokens = tokenise(xml);\n const records: MarcRecord[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const { record, end } = parseRecordTokens(tokens, i + 1);\n records.push(record);\n i = end;\n continue;\n }\n i++;\n }\n\n return records;\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\nfunction serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`${INDENT}<controlfield tag=\"${field.tag}\">${escapeXml(field.data)}</controlfield>`);\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(`${INDENT}<datafield tag=\"${field.tag}\" ind1=\"${ind1}\" ind2=\"${ind2}\">`);\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${sf.code}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [\n XML_HEADER,\n `<collection ${COLLECTION_NS}>`,\n ];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":";AAgBA,IAAM,IAA0C,oBAAI,IAAI;AAAA,EACtD,CAAC,OAAO,GAAG;AAAA,EACX,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,QAAQ,GAAG;AAAA,EACZ,CAAC,QAAQ,GAAG;AACd,CAAC;AAED,SAAS,EAAY,GAAsB;AACzC,SAAO,EAAK,QAAQ,iDAAA,CAAkD,GAAG,GAAK,GAAK,MAAS;AAC1F,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,WAAO,EAAW,IAAI,CAAI,KAAK;AAAA,EACjC,CAAC;AACH;AAEA,SAAS,EAAU,GAAsB;AACvC,SAAO,EAIJ,QAAQ,iCAAiC,GAAG,EAC5C,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ,EAGtB,QAAQ,OAAO,OAAO;AAC3B;AAeA,SAAS,EAAU,GAAqB;AACtC,QAAM,IAAQ,EAAI,QAAQ,GAAG;AAC7B,SAAO,MAAU,KAAK,IAAM,EAAI,MAAM,IAAQ,CAAC;AACjD;AAKA,SAAS,EAAW,GAAyC;AAC3D,QAAM,IAAgC,CAAC,GACjC,IAAK;AACX,MAAI;AACJ,UAAQ,IAAI,EAAG,KAAK,CAAO,OAAO,QAAM;AACtC,UAAM,IAAM,EAAU,EAAE,CAAA,CAAG;AAC3B,IAAA,EAAM,CAAA,IAAO,EAAY,EAAE,CAAA,KAAM,EAAE,CAAA,KAAM,EAAE;AAAA,EAC7C;AACA,SAAO;AACT;AAOA,SAAS,EAAS,GAAsB;AACtC,QAAM,IAAkB,CAAC;AACzB,MAAI,IAAI;AAER,SAAO,IAAI,EAAI,UAAQ;AACrB,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAC;AAGhC,QAAI,MAAU,IAAI;AAEhB,MADa,EAAI,MAAM,CAAC,EAAE,KACtB,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,EAAI,MAAM,CAAC,CAAC;AAAA,MAAE,CAAC;AACvE;AAAA,IACF;AAEA,QAAI,IAAQ,GAAG;AACb,YAAM,IAAM,EAAI,MAAM,GAAG,CAAK;AAE9B,MADa,EAAI,KACb,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,CAAG;AAAA,MAAE,CAAC;AAAA,IAChE;AAEA,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAK;AACpC,QAAI,MAAU,GAAI;AAElB,UAAM,IAAM,EAAI,MAAM,IAAQ,GAAG,CAAK;AAGtC,QAAI,EAAI,WAAW,GAAG,KAAK,EAAI,WAAW,GAAG,GAAG;AAC9C,MAAA,IAAI,IAAQ;AACZ;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,GAAG,EACpB,CAAA,EAAO,KAAK;AAAA,MAAE,MAAM;AAAA,MAAS,MAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAAE,CAAC;AAAA,aAC1D,EAAI,SAAS,GAAG,GAAG;AAC5B,YAAM,IAAQ,EAAI,MAAM,GAAG,EAAE,EAAE,KAAK,GAC9B,IAAW,EAAM,OAAO,IAAI,GAC5B,IAAO,MAAa,KAAK,IAAQ,EAAM,MAAM,GAAG,CAAQ,GACxD,IAAU,MAAa,KAAK,KAAK,EAAM,MAAM,CAAQ;AAC3D,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAc,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACvF,OAAO;AACL,YAAM,IAAW,EAAI,OAAO,IAAI,GAC1B,IAAO,MAAa,KAAK,IAAM,EAAI,MAAM,GAAG,CAAQ,GACpD,IAAU,MAAa,KAAK,KAAK,EAAI,MAAM,CAAQ;AACzD,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACjF;AAEA,IAAA,IAAI,IAAQ;AAAA,EACd;AAEA,SAAO;AACT;AAQA,SAAS,EAAkB,GAAiB,GAAoD;AAC9F,MAAI,IAAS;AACb,QAAM,IAAuC,CAAC;AAC9C,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AAEnB,QAAI,EAAI,SAAS,WAAW,EAAI,SAAS,SACvC,QAAO;AAAA,MAAE,QAAQ;AAAA,QAAE,QAAA;AAAA,QAAQ,QAAA;AAAA,MAAO;AAAA,MAAG,KAAK,IAAI;AAAA,IAAE;AAGlD,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,MAAA,KACI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAS,EAAO,CAAA,EAAI,KAAM,KAAK,GAC/B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS;AACtD;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,gBAAgB;AAC5D,MAAA,EAAO,KAAK;AAAA,QAAE,KAAK,EAAI,OAAQ,OAAU;AAAA,QAAI,MAAM;AAAA,MAAG,CAAC,GACvD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,gBAAgB;AACtD,YAAM,IAAM,EAAI,OAAQ,OAAU;AAClC,MAAA;AACA,UAAI,IAAO;AACX,MAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAO,EAAO,CAAA,EAAI,QAAQ,IAC1B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,MAAA;AAAA,MAAK,CAAC;AACzB;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,aAAa;AACzD,MAAA,EAAO,KAAK;AAAA,QACV,KAAK,EAAI,OAAQ,OAAU;AAAA,QAC3B,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,WAAW,CAAC;AAAA,MACd,CAAC,GACD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,aAAa;AACnD,YAAM,IAAM,EAAI,OAAQ,OAAU,IAC5B,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAwB,CAAC;AAG/B,WAFA,KAEO,IAAI,EAAO,UAAQ;AACxB,cAAM,IAAO,EAAO,CAAA;AACpB,YAAI,EAAK,SAAS,WAAW,EAAK,SAAS,aAAa;AACtD,UAAA;AACA;AAAA,QACF;AACA,YAAI,EAAK,SAAS,UAAU,EAAK,SAAS,YAAY;AACpD,gBAAM,IAAO,EAAK,OAAQ,QAAW;AACrC,UAAA;AACA,cAAI,IAAQ;AACZ,UAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAQ,EAAO,CAAA,EAAI,QAAQ,IAC3B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAU,KAAK;AAAA,YAAE,MAAA;AAAA,YAAM,OAAA;AAAA,UAAM,CAAC;AAC9B;AAAA,QACF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,YAAA;AAAA,QAAY,YAAA;AAAA,QAAY,WAAA;AAAA,MAAU,CAAC;AACtD;AAAA,IACF;AAEA,IAAA;AAAA,EACF;AAEA,SAAO;AAAA,IAAE,QAAQ;AAAA,MAAE,QAAA;AAAA,MAAQ,QAAA;AAAA,IAAO;AAAA,IAAG,KAAK;AAAA,EAAE;AAC9C;AAMA,SAAgB,EAAa,GAA2B;AACtD,QAAM,IAAS,EAAS,CAAG,GACrB,IAAwB,CAAC;AAC/B,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AACnB,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,YAAM,EAAE,QAAA,GAAQ,KAAA,EAAA,IAAQ,EAAkB,GAAQ,IAAI,CAAC;AACvD,MAAA,EAAQ,KAAK,CAAM,GACnB,IAAI;AACJ;AAAA,IACF;AACA,IAAA;AAAA,EACF;AAEA,SAAO;AACT;AAIA,IAAM,IAAa;AAAA,GACb,IAAgB,0CAChB,IAAS;AAEf,SAAS,EAAuB,GAA4B;AAC1D,QAAM,IAAkB,CAAC,WAAW,CAAA,GAAgB;AACpD,EAAA,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY;AAElE,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KAAK,GAAG,CAAA,sBAA4B,EAAM,GAAA,KAAQ,EAAU,EAAM,IAAI,CAAA,iBAAkB;AAAA,OACzF;AACL,UAAM,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM,YAC9C,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM;AACpD,IAAA,EAAM,KAAK,GAAG,CAAA,mBAAyB,EAAM,GAAA,WAAc,CAAA,WAAe,CAAA,IAAQ;AAClF,eAAW,KAAM,EAAM,UACrB,CAAA,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAG,IAAA,KAAS,EAAU,EAAG,KAAK,CAAA,aACrE;AAEF,IAAA,EAAM,KAAK,GAAG,CAAA,cAAoB;AAAA,EACpC;AAGF,SAAA,EAAM,KAAK,WAAW,GACf,EAAM,KAAK;AAAA,CAAI;AACxB;AAKA,SAAgB,EAAiB,GAA+B;AAC9D,QAAM,IAAkB,CACtB,GACA,eAAe,CAAA,GACjB;AAEA,aAAW,KAAU,GAAS;AAE5B,UAAM,IAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAA,CAAK,MAAS,IAAS,CAAI,EAC3B,KAAK;AAAA,CAAI;AACZ,IAAA,EAAM,KAAK,CAAS;AAAA,EACtB;AAEA,SAAA,EAAM,KAAK,eAAe,GACnB,EAAM,KAAK;AAAA,CAAI;AACxB"}
|
package/dist/parser.d.ts
CHANGED
|
@@ -1,43 +1,14 @@
|
|
|
1
|
-
import { MarcRecord, ParseOptions
|
|
1
|
+
import { MarcRecord, ParseOptions } from './types';
|
|
2
2
|
/**
|
|
3
|
-
* Parse a
|
|
3
|
+
* Parse a concatenated ISO2709 binary stream into an array of MARC records.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
* @
|
|
7
|
-
*
|
|
5
|
+
* Records in the stream are separated by 0x1D (RECORD_TERMINATOR). Each slice
|
|
6
|
+
* is parsed with {@link parseMarcRecord}; slices that produce a null record
|
|
7
|
+
* (e.g. due to encoding errors in lenient mode) are silently skipped.
|
|
8
8
|
*
|
|
9
|
-
* @
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* const result = parseMarcRecord(buffer);
|
|
13
|
-
*
|
|
14
|
-
* if (result.record) {
|
|
15
|
-
* console.log('Parsed successfully');
|
|
16
|
-
* }
|
|
17
|
-
*
|
|
18
|
-
* if (result.warnings.length > 0) {
|
|
19
|
-
* console.warn('Warnings:', result.warnings);
|
|
20
|
-
* }
|
|
21
|
-
* ```
|
|
22
|
-
*/
|
|
23
|
-
export declare function parseMarcRecord(buffer: Uint8Array, options?: ParseOptions): ParseResult;
|
|
24
|
-
/**
|
|
25
|
-
* Convenience wrapper for strict parsing.
|
|
26
|
-
* Throws an error if parsing fails.
|
|
27
|
-
*
|
|
28
|
-
* @param buffer - The binary data to parse
|
|
29
|
-
* @returns The parsed MARC record
|
|
30
|
-
* @throws Error if parsing fails
|
|
31
|
-
*
|
|
32
|
-
* @example
|
|
33
|
-
* ```typescript
|
|
34
|
-
* try {
|
|
35
|
-
* const record = parseMarcRecordStrict(buffer);
|
|
36
|
-
* console.log('Title:', title(record));
|
|
37
|
-
* } catch (error) {
|
|
38
|
-
* console.error('Parsing failed:', error);
|
|
39
|
-
* }
|
|
40
|
-
* ```
|
|
9
|
+
* @param buffer - Binary data containing one or more concatenated MARC records
|
|
10
|
+
* @param options - Parsing options forwarded to the per-record parser
|
|
11
|
+
* @returns Array of successfully parsed MARC records
|
|
41
12
|
*/
|
|
42
|
-
export declare function
|
|
13
|
+
export declare function parseMarcBinary(buffer: Uint8Array, options?: ParseOptions): MarcRecord[];
|
|
43
14
|
//# sourceMappingURL=parser.d.ts.map
|
package/dist/serializer.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MarcRecord
|
|
1
|
+
import { MarcRecord } from './types';
|
|
2
2
|
/**
|
|
3
3
|
* Options for serializing MARC records.
|
|
4
4
|
*/
|
|
@@ -13,45 +13,14 @@ export interface SerializeOptions {
|
|
|
13
13
|
readonly encoding?: 'utf8' | 'marc8';
|
|
14
14
|
}
|
|
15
15
|
/**
|
|
16
|
-
* Serialize
|
|
16
|
+
* Serialize an array of MARC records to a concatenated ISO2709 binary stream.
|
|
17
17
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
18
|
+
* Each record is individually serialized (with its own 0x1D terminator) and
|
|
19
|
+
* the results are concatenated into a single Uint8Array.
|
|
20
20
|
*
|
|
21
|
-
* @
|
|
22
|
-
*
|
|
23
|
-
*
|
|
24
|
-
* leader: '00000nam 2200000 4500',
|
|
25
|
-
* fields: [
|
|
26
|
-
* { tag: '001', data: 'ocm12345678' },
|
|
27
|
-
* {
|
|
28
|
-
* tag: '245',
|
|
29
|
-
* indicator1: '1',
|
|
30
|
-
* indicator2: '0',
|
|
31
|
-
* subfields: [{ code: 'a', value: 'Title' }],
|
|
32
|
-
* },
|
|
33
|
-
* ],
|
|
34
|
-
* };
|
|
35
|
-
*
|
|
36
|
-
* const buffer = serializeMarcRecord(record);
|
|
37
|
-
* // Can now be written to file or transmitted
|
|
38
|
-
* ```
|
|
39
|
-
*/
|
|
40
|
-
export declare function serializeMarcRecord(record: MarcRecord, options?: SerializeOptions): Uint8Array;
|
|
41
|
-
/**
|
|
42
|
-
* Result of {@link serializeMarcRecordWithWarnings}: the serialized bytes
|
|
43
|
-
* along with any warnings generated by the encoder (e.g. lossy MARC-8
|
|
44
|
-
* substitutions).
|
|
45
|
-
*/
|
|
46
|
-
export interface SerializeResult {
|
|
47
|
-
readonly bytes: Uint8Array;
|
|
48
|
-
readonly warnings: readonly MarcWarning[];
|
|
49
|
-
}
|
|
50
|
-
/**
|
|
51
|
-
* Serialize a MARC record and surface any warnings generated by the encoder.
|
|
52
|
-
* Use this when you need programmatic visibility into lossy encodings — for
|
|
53
|
-
* example, MARC-8 output of records containing characters with no MARC-8
|
|
54
|
-
* equivalent.
|
|
21
|
+
* @param records - MARC records to serialize
|
|
22
|
+
* @param options - Encoding options forwarded to the per-record serializer
|
|
23
|
+
* @returns Concatenated binary representation of all records
|
|
55
24
|
*/
|
|
56
|
-
export declare function
|
|
25
|
+
export declare function serializeMarcBinary(records: MarcRecord[], options?: SerializeOptions): Uint8Array;
|
|
57
26
|
//# sourceMappingURL=serializer.d.ts.map
|
package/dist/warnings.d.ts
CHANGED