marc-ts 0.2.0 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -130
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +410 -387
- package/dist/index.js.map +1 -1
- package/dist/marcjson.cjs +1 -1
- package/dist/marcjson.cjs.map +1 -1
- package/dist/marcjson.js +1 -1
- package/dist/marcjson.js.map +1 -1
- package/dist/marctxt.cjs +1 -1
- package/dist/marctxt.cjs.map +1 -1
- package/dist/marctxt.js +1 -1
- package/dist/marctxt.js.map +1 -1
- package/dist/marcxml.cjs +6 -6
- package/dist/marcxml.cjs.map +1 -1
- package/dist/marcxml.d.ts +8 -3
- package/dist/marcxml.js +188 -127
- package/dist/marcxml.js.map +1 -1
- package/dist/parser.d.ts +10 -1
- package/dist/serializer.d.ts +11 -1
- package/dist/{types-c4Mo9m9u.js → types-BMKDHD1l.js} +1 -1
- package/dist/types-BMKDHD1l.js.map +1 -0
- package/dist/{types-CJcxHJff.cjs → types-CsOhH4OF.cjs} +1 -1
- package/dist/types-CsOhH4OF.cjs.map +1 -0
- package/dist/types.d.ts +23 -1
- package/dist/warnings-6yoB06xI.cjs +3 -0
- package/dist/warnings-6yoB06xI.cjs.map +1 -0
- package/dist/warnings-Bt6wvWFe.js +13 -0
- package/dist/warnings-Bt6wvWFe.js.map +1 -0
- package/package.json +2 -2
- package/dist/types-CJcxHJff.cjs.map +0 -1
- package/dist/types-c4Mo9m9u.js.map +0 -1
package/dist/marcxml.js
CHANGED
|
@@ -1,213 +1,274 @@
|
|
|
1
|
-
import { t as
|
|
2
|
-
|
|
1
|
+
import { t as E } from "./types-BMKDHD1l.js";
|
|
2
|
+
import { t as d } from "./warnings-Bt6wvWFe.js";
|
|
3
|
+
var A = /* @__PURE__ */ new Map([
|
|
3
4
|
["amp", "&"],
|
|
4
5
|
["lt", "<"],
|
|
5
6
|
["gt", ">"],
|
|
6
7
|
["quot", '"'],
|
|
7
8
|
["apos", "'"]
|
|
8
9
|
]);
|
|
9
|
-
function
|
|
10
|
-
return e.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (
|
|
11
|
-
if (
|
|
12
|
-
const
|
|
13
|
-
return
|
|
10
|
+
function y(e) {
|
|
11
|
+
return e.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (r, n, i, s) => {
|
|
12
|
+
if (n !== void 0) {
|
|
13
|
+
const a = parseInt(n, 16);
|
|
14
|
+
return a >= 0 && a <= 1114111 ? String.fromCodePoint(a) : "�";
|
|
14
15
|
}
|
|
15
|
-
if (
|
|
16
|
-
const
|
|
17
|
-
return
|
|
16
|
+
if (i !== void 0) {
|
|
17
|
+
const a = parseInt(i, 10);
|
|
18
|
+
return a >= 0 && a <= 1114111 ? String.fromCodePoint(a) : "�";
|
|
18
19
|
}
|
|
19
|
-
return
|
|
20
|
+
return A.get(s) ?? r;
|
|
20
21
|
});
|
|
21
22
|
}
|
|
22
23
|
function u(e) {
|
|
23
24
|
return e.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, "�").replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/\r/g, " ");
|
|
24
25
|
}
|
|
25
|
-
function f(e) {
|
|
26
|
-
const i = e.indexOf(":");
|
|
27
|
-
return i === -1 ? e : e.slice(i + 1);
|
|
28
|
-
}
|
|
29
26
|
function m(e) {
|
|
30
|
-
const
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
27
|
+
const r = e.indexOf(":");
|
|
28
|
+
return r === -1 ? e : e.slice(r + 1);
|
|
29
|
+
}
|
|
30
|
+
function $(e) {
|
|
31
|
+
const r = {}, n = /([a-zA-Z_:][^\s=]*)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
|
|
32
|
+
let i;
|
|
33
|
+
for (; (i = n.exec(e)) !== null; ) {
|
|
34
|
+
const s = m(i[1]);
|
|
35
|
+
r[s] = y(i[2] ?? i[3] ?? "");
|
|
36
|
+
}
|
|
37
|
+
return r;
|
|
38
|
+
}
|
|
39
|
+
function T(e, r) {
|
|
40
|
+
let n = !1, i = !1;
|
|
41
|
+
for (let s = r; s < e.length; s++) {
|
|
42
|
+
const a = e[s];
|
|
43
|
+
if (a === '"' && !n) i = !i;
|
|
44
|
+
else if (a === "'" && !i) n = !n;
|
|
45
|
+
else if (a === ">" && !n && !i) return s;
|
|
35
46
|
}
|
|
36
|
-
return
|
|
47
|
+
return -1;
|
|
37
48
|
}
|
|
38
|
-
function
|
|
39
|
-
const
|
|
40
|
-
let
|
|
41
|
-
for (;
|
|
42
|
-
const
|
|
43
|
-
if (
|
|
44
|
-
e.slice(
|
|
49
|
+
function C(e, r) {
|
|
50
|
+
const n = [];
|
|
51
|
+
let i = 0;
|
|
52
|
+
for (; i < e.length; ) {
|
|
53
|
+
const s = e.indexOf("<", i);
|
|
54
|
+
if (s === -1) {
|
|
55
|
+
e.slice(i).trim() && n.push({
|
|
45
56
|
type: "text",
|
|
46
|
-
text:
|
|
57
|
+
text: y(e.slice(i))
|
|
47
58
|
});
|
|
48
59
|
break;
|
|
49
60
|
}
|
|
50
|
-
if (
|
|
51
|
-
const
|
|
52
|
-
|
|
61
|
+
if (s > i) {
|
|
62
|
+
const t = e.slice(i, s);
|
|
63
|
+
t.trim() && n.push({
|
|
53
64
|
type: "text",
|
|
54
|
-
text:
|
|
65
|
+
text: y(t)
|
|
55
66
|
});
|
|
56
67
|
}
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
68
|
+
if (e.startsWith("<!--", s)) {
|
|
69
|
+
const t = e.indexOf("-->", s + 4);
|
|
70
|
+
t === -1 ? (r?.push(d("malformed_xml", "Unterminated comment")), i = e.length) : i = t + 3;
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (e.startsWith("<![CDATA[", s)) {
|
|
74
|
+
const t = e.indexOf("]]>", s + 9);
|
|
75
|
+
if (t === -1)
|
|
76
|
+
r?.push(d("malformed_xml", "Unterminated CDATA section")), i = e.length;
|
|
77
|
+
else {
|
|
78
|
+
const c = e.slice(s + 9, t);
|
|
79
|
+
c && n.push({
|
|
80
|
+
type: "text",
|
|
81
|
+
text: c
|
|
82
|
+
}), i = t + 3;
|
|
83
|
+
}
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
if (e.startsWith("<?", s)) {
|
|
87
|
+
const t = e.indexOf("?>", s + 2);
|
|
88
|
+
i = t === -1 ? e.length : t + 2;
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
if (e.startsWith("<!", s)) {
|
|
92
|
+
const t = e.indexOf(">", s + 2);
|
|
93
|
+
i = t === -1 ? e.length : t + 1;
|
|
62
94
|
continue;
|
|
63
95
|
}
|
|
64
|
-
|
|
96
|
+
const a = T(e, s + 1);
|
|
97
|
+
if (a === -1) {
|
|
98
|
+
r?.push(d("malformed_xml", "Unclosed tag at end of input"));
|
|
99
|
+
break;
|
|
100
|
+
}
|
|
101
|
+
const o = e.slice(s + 1, a);
|
|
102
|
+
if (o.startsWith("/")) n.push({
|
|
65
103
|
type: "close",
|
|
66
|
-
name:
|
|
104
|
+
name: m(o.slice(1).trim())
|
|
67
105
|
});
|
|
68
|
-
else if (
|
|
69
|
-
const
|
|
70
|
-
|
|
106
|
+
else if (o.endsWith("/")) {
|
|
107
|
+
const t = o.slice(0, -1).trim(), c = t.search(/\s/), l = c === -1 ? t : t.slice(0, c), f = c === -1 ? "" : t.slice(c);
|
|
108
|
+
n.push({
|
|
71
109
|
type: "self-close",
|
|
72
|
-
name:
|
|
73
|
-
attrs:
|
|
110
|
+
name: m(l),
|
|
111
|
+
attrs: $(f)
|
|
74
112
|
});
|
|
75
113
|
} else {
|
|
76
|
-
const
|
|
77
|
-
|
|
114
|
+
const t = o.search(/\s/), c = t === -1 ? o : o.slice(0, t), l = t === -1 ? "" : o.slice(t);
|
|
115
|
+
n.push({
|
|
78
116
|
type: "open",
|
|
79
|
-
name:
|
|
80
|
-
attrs:
|
|
117
|
+
name: m(c),
|
|
118
|
+
attrs: $(l)
|
|
81
119
|
});
|
|
82
120
|
}
|
|
83
|
-
|
|
121
|
+
i = a + 1;
|
|
84
122
|
}
|
|
85
|
-
return
|
|
123
|
+
return n;
|
|
124
|
+
}
|
|
125
|
+
function p(e, r, n) {
|
|
126
|
+
if (n.strict) throw new Error(r.message);
|
|
127
|
+
const i = n.maxWarnings ?? 100;
|
|
128
|
+
e.length < i && e.push(r);
|
|
86
129
|
}
|
|
87
|
-
function
|
|
88
|
-
let s = "";
|
|
89
|
-
const
|
|
90
|
-
let t =
|
|
130
|
+
function M(e, r, n, i) {
|
|
131
|
+
let s = "", a = !1;
|
|
132
|
+
const o = [];
|
|
133
|
+
let t = r;
|
|
91
134
|
for (; t < e.length; ) {
|
|
92
|
-
const
|
|
93
|
-
if (
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
135
|
+
const c = e[t];
|
|
136
|
+
if (c.type === "close" && c.name === "record")
|
|
137
|
+
return a || p(n, d("missing_element", "Record has no <leader> element"), i), {
|
|
138
|
+
record: {
|
|
139
|
+
leader: s,
|
|
140
|
+
fields: o
|
|
141
|
+
},
|
|
142
|
+
end: t + 1
|
|
143
|
+
};
|
|
144
|
+
if (c.type === "open" && c.name === "leader") {
|
|
145
|
+
a = !0, t++, t < e.length && e[t].type === "text" && (s = e[t].text.trim(), t++), s.length !== 24 && p(n, d("invalid_leader", `Leader is ${s.length} characters, expected 24`), i), t < e.length && e[t].type === "close" && t++;
|
|
102
146
|
continue;
|
|
103
147
|
}
|
|
104
|
-
if (
|
|
105
|
-
c.
|
|
106
|
-
|
|
148
|
+
if (c.type === "self-close" && c.name === "controlfield") {
|
|
149
|
+
const l = c.attrs?.tag;
|
|
150
|
+
l === void 0 && p(n, d("missing_element", "controlfield missing tag attribute"), i), o.push({
|
|
151
|
+
tag: l ?? "",
|
|
107
152
|
data: ""
|
|
108
153
|
}), t++;
|
|
109
154
|
continue;
|
|
110
155
|
}
|
|
111
|
-
if (
|
|
112
|
-
const
|
|
113
|
-
t++;
|
|
114
|
-
let
|
|
115
|
-
t < e.length && e[t].type === "text" && (
|
|
116
|
-
tag:
|
|
117
|
-
data:
|
|
156
|
+
if (c.type === "open" && c.name === "controlfield") {
|
|
157
|
+
const l = c.attrs?.tag;
|
|
158
|
+
l === void 0 && p(n, d("missing_element", "controlfield missing tag attribute"), i), t++;
|
|
159
|
+
let f = "";
|
|
160
|
+
t < e.length && e[t].type === "text" && (f = e[t].text ?? "", t++), t < e.length && e[t].type === "close" && t++, o.push({
|
|
161
|
+
tag: l ?? "",
|
|
162
|
+
data: f
|
|
118
163
|
});
|
|
119
164
|
continue;
|
|
120
165
|
}
|
|
121
|
-
if (
|
|
122
|
-
c.
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
166
|
+
if (c.type === "self-close" && c.name === "datafield") {
|
|
167
|
+
const l = c.attrs?.tag;
|
|
168
|
+
l === void 0 && p(n, d("missing_element", "datafield missing tag attribute"), i), o.push({
|
|
169
|
+
tag: l ?? "",
|
|
170
|
+
indicator1: c.attrs?.ind1 ?? " ",
|
|
171
|
+
indicator2: c.attrs?.ind2 ?? " ",
|
|
126
172
|
subfields: []
|
|
127
173
|
}), t++;
|
|
128
174
|
continue;
|
|
129
175
|
}
|
|
130
|
-
if (
|
|
131
|
-
const
|
|
176
|
+
if (c.type === "open" && c.name === "datafield") {
|
|
177
|
+
const l = c.attrs?.tag;
|
|
178
|
+
l === void 0 && p(n, d("missing_element", "datafield missing tag attribute"), i);
|
|
179
|
+
const f = c.attrs?.ind1 ?? " ", W = c.attrs?.ind2 ?? " ", b = [];
|
|
132
180
|
for (t++; t < e.length; ) {
|
|
133
|
-
const
|
|
134
|
-
if (
|
|
181
|
+
const h = e[t];
|
|
182
|
+
if (h.type === "close" && h.name === "datafield") {
|
|
135
183
|
t++;
|
|
136
184
|
break;
|
|
137
185
|
}
|
|
138
|
-
if (
|
|
139
|
-
const
|
|
140
|
-
t++;
|
|
141
|
-
let
|
|
142
|
-
t < e.length && e[t].type === "text" && (
|
|
143
|
-
code:
|
|
144
|
-
value:
|
|
186
|
+
if (h.type === "open" && h.name === "subfield") {
|
|
187
|
+
const v = h.attrs?.code;
|
|
188
|
+
v === void 0 && p(n, d("missing_element", "subfield missing code attribute", void 0, l), i), t++;
|
|
189
|
+
let x = "";
|
|
190
|
+
t < e.length && e[t].type === "text" && (x = e[t].text ?? "", t++), t < e.length && e[t].type === "close" && t++, b.push({
|
|
191
|
+
code: v ?? "",
|
|
192
|
+
value: x
|
|
145
193
|
});
|
|
146
194
|
continue;
|
|
147
195
|
}
|
|
148
196
|
t++;
|
|
149
197
|
}
|
|
150
|
-
|
|
151
|
-
tag:
|
|
152
|
-
indicator1:
|
|
153
|
-
indicator2:
|
|
154
|
-
subfields:
|
|
198
|
+
o.push({
|
|
199
|
+
tag: l ?? "",
|
|
200
|
+
indicator1: f,
|
|
201
|
+
indicator2: W,
|
|
202
|
+
subfields: b
|
|
155
203
|
});
|
|
156
204
|
continue;
|
|
157
205
|
}
|
|
158
206
|
t++;
|
|
159
207
|
}
|
|
160
|
-
return {
|
|
208
|
+
return a || p(n, d("missing_element", "Record has no <leader> element"), i), {
|
|
161
209
|
record: {
|
|
162
210
|
leader: s,
|
|
163
|
-
fields:
|
|
211
|
+
fields: o
|
|
164
212
|
},
|
|
165
213
|
end: t
|
|
166
214
|
};
|
|
167
215
|
}
|
|
168
|
-
function
|
|
169
|
-
const i =
|
|
170
|
-
let
|
|
171
|
-
for (;
|
|
172
|
-
const t =
|
|
216
|
+
function O(e, r) {
|
|
217
|
+
const n = r ?? {}, i = [], s = C(e, i), a = [];
|
|
218
|
+
let o = 0;
|
|
219
|
+
for (; o < s.length; ) {
|
|
220
|
+
const t = s[o];
|
|
173
221
|
if (t.type === "open" && t.name === "record") {
|
|
174
|
-
const { record:
|
|
175
|
-
|
|
222
|
+
const c = [], { record: l, end: f } = M(s, o + 1, c, n);
|
|
223
|
+
a.push({
|
|
224
|
+
record: l,
|
|
225
|
+
warnings: c
|
|
226
|
+
}), o = f;
|
|
176
227
|
continue;
|
|
177
228
|
}
|
|
178
|
-
|
|
229
|
+
o++;
|
|
179
230
|
}
|
|
180
|
-
return
|
|
231
|
+
return a.length === 0 && i.length > 0 ? a.push({
|
|
232
|
+
record: null,
|
|
233
|
+
warnings: i
|
|
234
|
+
}) : i.length > 0 && a.length > 0 && (a[0] = {
|
|
235
|
+
record: a[0].record,
|
|
236
|
+
warnings: [...i, ...a[0].warnings]
|
|
237
|
+
}), { results: a };
|
|
238
|
+
}
|
|
239
|
+
function S(e, r) {
|
|
240
|
+
return O(e, r).results.map((n) => n.record).filter((n) => n !== null);
|
|
181
241
|
}
|
|
182
|
-
var
|
|
183
|
-
`,
|
|
184
|
-
function
|
|
185
|
-
const
|
|
186
|
-
|
|
187
|
-
for (const
|
|
242
|
+
var X = `<?xml version="1.0" encoding="UTF-8"?>
|
|
243
|
+
`, _ = 'xmlns="http://www.loc.gov/MARC21/slim"', g = " ";
|
|
244
|
+
function I(e) {
|
|
245
|
+
const r = [`<record ${_}>`];
|
|
246
|
+
r.push(`${g}<leader>${u(e.leader)}</leader>`);
|
|
247
|
+
for (const n of e.fields) if (E(n)) r.push(`${g}<controlfield tag="${u(n.tag)}">${u(n.data)}</controlfield>`);
|
|
188
248
|
else {
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
for (const
|
|
192
|
-
|
|
249
|
+
const i = n.indicator1 === " " ? " " : n.indicator1, s = n.indicator2 === " " ? " " : n.indicator2;
|
|
250
|
+
r.push(`${g}<datafield tag="${u(n.tag)}" ind1="${u(i)}" ind2="${u(s)}">`);
|
|
251
|
+
for (const a of n.subfields) r.push(`${g}${g}<subfield code="${u(a.code)}">${u(a.value)}</subfield>`);
|
|
252
|
+
r.push(`${g}</datafield>`);
|
|
193
253
|
}
|
|
194
|
-
return
|
|
254
|
+
return r.push("</record>"), r.join(`
|
|
195
255
|
`);
|
|
196
256
|
}
|
|
197
|
-
function
|
|
198
|
-
const
|
|
199
|
-
for (const
|
|
200
|
-
const
|
|
201
|
-
`).map((
|
|
257
|
+
function D(e) {
|
|
258
|
+
const r = [X, `<collection ${_}>`];
|
|
259
|
+
for (const n of e) {
|
|
260
|
+
const i = I(n).split(`
|
|
261
|
+
`).map((s) => g + s).join(`
|
|
202
262
|
`);
|
|
203
|
-
|
|
263
|
+
r.push(i);
|
|
204
264
|
}
|
|
205
|
-
return
|
|
265
|
+
return r.push("</collection>"), r.join(`
|
|
206
266
|
`);
|
|
207
267
|
}
|
|
208
268
|
export {
|
|
209
|
-
|
|
210
|
-
|
|
269
|
+
S as parseMarcXml,
|
|
270
|
+
O as parseMarcXmlWithWarnings,
|
|
271
|
+
D as serializeMarcXml
|
|
211
272
|
};
|
|
212
273
|
|
|
213
274
|
//# sourceMappingURL=marcxml.js.map
|
package/dist/marcxml.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"marcxml.js","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type { MarcRecord, ControlField, DataField, Subfield } from './types';\nimport { isControlField } from './types';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, ' ');\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Sufficient for the well-constrained MARCXML format.\n */\nfunction tokenise(xml: string): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n const gtPos = xml.indexOf('>', ltPos);\n if (gtPos === -1) break;\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n // Skip comments, PIs, DOCTYPE\n if (tag.startsWith('!') || tag.startsWith('?')) {\n i = gtPos + 1;\n continue;\n }\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({ type: 'self-close', name: localName(name), attrs: parseAttrs(attrStr) });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({ type: 'open', name: localName(name), attrs: parseAttrs(attrStr) });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n * Mutates `pos` via the returned index.\n */\nfunction parseRecordTokens(tokens: Token[], start: number): { record: MarcRecord; end: number } {\n let leader = '';\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n fields.push({ tag: tok.attrs?.['tag'] ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'] ?? '';\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag, data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n fields.push({\n tag: tok.attrs?.['tag'] ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'] ?? '';\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'] ?? '';\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code, value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag, indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all records found.\n */\nexport function parseMarcXml(xml: string): MarcRecord[] {\n const tokens = tokenise(xml);\n const records: MarcRecord[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const { record, end } = parseRecordTokens(tokens, i + 1);\n records.push(record);\n i = end;\n continue;\n }\n i++;\n }\n\n return records;\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\nfunction serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(`${INDENT}<controlfield tag=\"${field.tag}\">${escapeXml(field.data)}</controlfield>`);\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(`${INDENT}<datafield tag=\"${field.tag}\" ind1=\"${ind1}\" ind2=\"${ind2}\">`);\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${sf.code}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [\n XML_HEADER,\n `<collection ${COLLECTION_NS}>`,\n ];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":";AAgBA,IAAM,IAA0C,oBAAI,IAAI;AAAA,EACtD,CAAC,OAAO,GAAG;AAAA,EACX,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,QAAQ,GAAG;AAAA,EACZ,CAAC,QAAQ,GAAG;AACd,CAAC;AAED,SAAS,EAAY,GAAsB;AACzC,SAAO,EAAK,QAAQ,iDAAA,CAAkD,GAAG,GAAK,GAAK,MAAS;AAC1F,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,WAAO,EAAW,IAAI,CAAI,KAAK;AAAA,EACjC,CAAC;AACH;AAEA,SAAS,EAAU,GAAsB;AACvC,SAAO,EAIJ,QAAQ,iCAAiC,GAAG,EAC5C,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ,EAGtB,QAAQ,OAAO,OAAO;AAC3B;AAeA,SAAS,EAAU,GAAqB;AACtC,QAAM,IAAQ,EAAI,QAAQ,GAAG;AAC7B,SAAO,MAAU,KAAK,IAAM,EAAI,MAAM,IAAQ,CAAC;AACjD;AAKA,SAAS,EAAW,GAAyC;AAC3D,QAAM,IAAgC,CAAC,GACjC,IAAK;AACX,MAAI;AACJ,UAAQ,IAAI,EAAG,KAAK,CAAO,OAAO,QAAM;AACtC,UAAM,IAAM,EAAU,EAAE,CAAA,CAAG;AAC3B,IAAA,EAAM,CAAA,IAAO,EAAY,EAAE,CAAA,KAAM,EAAE,CAAA,KAAM,EAAE;AAAA,EAC7C;AACA,SAAO;AACT;AAOA,SAAS,EAAS,GAAsB;AACtC,QAAM,IAAkB,CAAC;AACzB,MAAI,IAAI;AAER,SAAO,IAAI,EAAI,UAAQ;AACrB,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAC;AAGhC,QAAI,MAAU,IAAI;AAEhB,MADa,EAAI,MAAM,CAAC,EAAE,KACtB,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,EAAI,MAAM,CAAC,CAAC;AAAA,MAAE,CAAC;AACvE;AAAA,IACF;AAEA,QAAI,IAAQ,GAAG;AACb,YAAM,IAAM,EAAI,MAAM,GAAG,CAAK;AAE9B,MADa,EAAI,KACb,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,CAAG;AAAA,MAAE,CAAC;AAAA,IAChE;AAEA,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAK;AACpC,QAAI,MAAU,GAAI;AAElB,UAAM,IAAM,EAAI,MAAM,IAAQ,GAAG,CAAK;AAGtC,QAAI,EAAI,WAAW,GAAG,KAAK,EAAI,WAAW,GAAG,GAAG;AAC9C,MAAA,IAAI,IAAQ;AACZ;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,GAAG,EACpB,CAAA,EAAO,KAAK;AAAA,MAAE,MAAM;AAAA,MAAS,MAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAAE,CAAC;AAAA,aAC1D,EAAI,SAAS,GAAG,GAAG;AAC5B,YAAM,IAAQ,EAAI,MAAM,GAAG,EAAE,EAAE,KAAK,GAC9B,IAAW,EAAM,OAAO,IAAI,GAC5B,IAAO,MAAa,KAAK,IAAQ,EAAM,MAAM,GAAG,CAAQ,GACxD,IAAU,MAAa,KAAK,KAAK,EAAM,MAAM,CAAQ;AAC3D,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAc,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACvF,OAAO;AACL,YAAM,IAAW,EAAI,OAAO,IAAI,GAC1B,IAAO,MAAa,KAAK,IAAM,EAAI,MAAM,GAAG,CAAQ,GACpD,IAAU,MAAa,KAAK,KAAK,EAAI,MAAM,CAAQ;AACzD,MAAA,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAU,CAAI;AAAA,QAAG,OAAO,EAAW,CAAO;AAAA,MAAE,CAAC;AAAA,IACjF;AAEA,IAAA,IAAI,IAAQ;AAAA,EACd;AAEA,SAAO;AACT;AAQA,SAAS,EAAkB,GAAiB,GAAoD;AAC9F,MAAI,IAAS;AACb,QAAM,IAAuC,CAAC;AAC9C,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AAEnB,QAAI,EAAI,SAAS,WAAW,EAAI,SAAS,SACvC,QAAO;AAAA,MAAE,QAAQ;AAAA,QAAE,QAAA;AAAA,QAAQ,QAAA;AAAA,MAAO;AAAA,MAAG,KAAK,IAAI;AAAA,IAAE;AAGlD,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,MAAA,KACI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAS,EAAO,CAAA,EAAI,KAAM,KAAK,GAC/B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS;AACtD;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,gBAAgB;AAC5D,MAAA,EAAO,KAAK;AAAA,QAAE,KAAK,EAAI,OAAQ,OAAU;AAAA,QAAI,MAAM;AAAA,MAAG,CAAC,GACvD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,gBAAgB;AACtD,YAAM,IAAM,EAAI,OAAQ,OAAU;AAClC,MAAA;AACA,UAAI,IAAO;AACX,MAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAO,EAAO,CAAA,EAAI,QAAQ,IAC1B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,MAAA;AAAA,MAAK,CAAC;AACzB;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,aAAa;AACzD,MAAA,EAAO,KAAK;AAAA,QACV,KAAK,EAAI,OAAQ,OAAU;AAAA,QAC3B,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,WAAW,CAAC;AAAA,MACd,CAAC,GACD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,aAAa;AACnD,YAAM,IAAM,EAAI,OAAQ,OAAU,IAC5B,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAwB,CAAC;AAG/B,WAFA,KAEO,IAAI,EAAO,UAAQ;AACxB,cAAM,IAAO,EAAO,CAAA;AACpB,YAAI,EAAK,SAAS,WAAW,EAAK,SAAS,aAAa;AACtD,UAAA;AACA;AAAA,QACF;AACA,YAAI,EAAK,SAAS,UAAU,EAAK,SAAS,YAAY;AACpD,gBAAM,IAAO,EAAK,OAAQ,QAAW;AACrC,UAAA;AACA,cAAI,IAAQ;AACZ,UAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAQ,EAAO,CAAA,EAAI,QAAQ,IAC3B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAU,KAAK;AAAA,YAAE,MAAA;AAAA,YAAM,OAAA;AAAA,UAAM,CAAC;AAC9B;AAAA,QACF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,EAAO,KAAK;AAAA,QAAE,KAAA;AAAA,QAAK,YAAA;AAAA,QAAY,YAAA;AAAA,QAAY,WAAA;AAAA,MAAU,CAAC;AACtD;AAAA,IACF;AAEA,IAAA;AAAA,EACF;AAEA,SAAO;AAAA,IAAE,QAAQ;AAAA,MAAE,QAAA;AAAA,MAAQ,QAAA;AAAA,IAAO;AAAA,IAAG,KAAK;AAAA,EAAE;AAC9C;AAMA,SAAgB,EAAa,GAA2B;AACtD,QAAM,IAAS,EAAS,CAAG,GACrB,IAAwB,CAAC;AAC/B,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AACnB,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,YAAM,EAAE,QAAA,GAAQ,KAAA,EAAA,IAAQ,EAAkB,GAAQ,IAAI,CAAC;AACvD,MAAA,EAAQ,KAAK,CAAM,GACnB,IAAI;AACJ;AAAA,IACF;AACA,IAAA;AAAA,EACF;AAEA,SAAO;AACT;AAIA,IAAM,IAAa;AAAA,GACb,IAAgB,0CAChB,IAAS;AAEf,SAAS,EAAuB,GAA4B;AAC1D,QAAM,IAAkB,CAAC,WAAW,CAAA,GAAgB;AACpD,EAAA,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY;AAElE,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KAAK,GAAG,CAAA,sBAA4B,EAAM,GAAA,KAAQ,EAAU,EAAM,IAAI,CAAA,iBAAkB;AAAA,OACzF;AACL,UAAM,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM,YAC9C,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM;AACpD,IAAA,EAAM,KAAK,GAAG,CAAA,mBAAyB,EAAM,GAAA,WAAc,CAAA,WAAe,CAAA,IAAQ;AAClF,eAAW,KAAM,EAAM,UACrB,CAAA,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAG,IAAA,KAAS,EAAU,EAAG,KAAK,CAAA,aACrE;AAEF,IAAA,EAAM,KAAK,GAAG,CAAA,cAAoB;AAAA,EACpC;AAGF,SAAA,EAAM,KAAK,WAAW,GACf,EAAM,KAAK;AAAA,CAAI;AACxB;AAKA,SAAgB,EAAiB,GAA+B;AAC9D,QAAM,IAAkB,CACtB,GACA,eAAe,CAAA,GACjB;AAEA,aAAW,KAAU,GAAS;AAE5B,UAAM,IAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAA,CAAK,MAAS,IAAS,CAAI,EAC3B,KAAK;AAAA,CAAI;AACZ,IAAA,EAAM,KAAK,CAAS;AAAA,EACtB;AAEA,SAAA,EAAM,KAAK,eAAe,GACnB,EAAM,KAAK;AAAA,CAAI;AACxB"}
|
|
1
|
+
{"version":3,"file":"marcxml.js","names":[],"sources":["../src/marcxml.ts"],"sourcesContent":["/**\n * MARCXML parser and serializer.\n *\n * Supports the Library of Congress MARCXML schema:\n * http://www.loc.gov/MARC21/slim\n *\n * Parsing is done with a hand-rolled state machine — no XML library needed.\n * The MARCXML format is sufficiently regular (fixed element names, no arbitrary\n * nesting) that a full DOM parser is unnecessary.\n */\n\nimport type {\n MarcRecord,\n ControlField,\n DataField,\n Subfield,\n ParseOptions,\n ParseResult,\n ParseBatchResult,\n MarcWarning,\n} from './types';\nimport { isControlField } from './types';\nimport { createWarning } from './warnings';\n\n// ─── XML entity handling ─────────────────────────────────────────────────────\n\nconst ENTITY_MAP: ReadonlyMap<string, string> = new Map([\n ['amp', '&'],\n ['lt', '<'],\n ['gt', '>'],\n ['quot', '\"'],\n ['apos', \"'\"],\n]);\n\nfunction unescapeXml(text: string): string {\n return text.replace(/&(?:#x([0-9a-fA-F]+)|#([0-9]+)|([a-zA-Z]+));/g, (_, hex, dec, name) => {\n if (hex !== undefined) {\n const cp = parseInt(hex, 16);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n if (dec !== undefined) {\n const cp = parseInt(dec, 10);\n return cp >= 0 && cp <= 0x10ffff ? String.fromCodePoint(cp) : '�';\n }\n return ENTITY_MAP.get(name) ?? _;\n });\n}\n\nfunction escapeXml(text: string): string {\n return (\n text\n // XML 1.0 forbids most C0 control characters in document text. There is no\n // valid XML 1.0 representation for them, so substitute the Unicode\n // replacement character to keep the output well-formed.\n .replace(/[\\x00-\\x08\\x0B\\x0C\\x0E-\\x1F]/g, '�')\n .replace(/&/g, '&')\n .replace(/</g, '<')\n .replace(/>/g, '>')\n .replace(/\"/g, '"')\n // Preserve literal CR through the XML round-trip: XML parsers normalize\n // bare \\r and \\r\\n to \\n, so we must encode CR as a numeric reference.\n .replace(/\\r/g, ' ')\n );\n}\n\n// ─── Minimal tokeniser ────────────────────────────────────────────────────────\n\ninterface Token {\n type: 'open' | 'close' | 'self-close' | 'text';\n /** Local name (no namespace prefix) */\n name?: string;\n attrs?: Record<string, string>;\n text?: string;\n}\n\n/**\n * Strip namespace prefix from a tag name, e.g. \"marc:record\" → \"record\".\n */\nfunction localName(raw: string): string {\n const colon = raw.indexOf(':');\n return colon === -1 ? raw : raw.slice(colon + 1);\n}\n\n/**\n * Parse `key=\"value\"` pairs out of an attribute string.\n */\nfunction parseAttrs(attrStr: string): Record<string, string> {\n const attrs: Record<string, string> = {};\n const re = /([a-zA-Z_:][^\\s=]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)')/g;\n let m: RegExpExecArray | null;\n while ((m = re.exec(attrStr)) !== null) {\n const key = localName(m[1]!);\n attrs[key] = unescapeXml(m[2] ?? m[3] ?? '');\n }\n return attrs;\n}\n\n/**\n * Find the closing `>` of an XML element tag, respecting quoted attribute values.\n */\nfunction findTagEnd(xml: string, start: number): number {\n let inSingle = false;\n let inDouble = false;\n for (let j = start; j < xml.length; j++) {\n const ch = xml[j];\n if (ch === '\"' && !inSingle) inDouble = !inDouble;\n else if (ch === \"'\" && !inDouble) inSingle = !inSingle;\n else if (ch === '>' && !inSingle && !inDouble) return j;\n }\n return -1;\n}\n\n/**\n * Tokenise an XML string into a flat stream of open/close/text tokens.\n * Skips processing instructions, comments, and DOCTYPE declarations.\n * Handles CDATA sections as literal text.\n */\nfunction tokenise(xml: string, warnings?: MarcWarning[]): Token[] {\n const tokens: Token[] = [];\n let i = 0;\n\n while (i < xml.length) {\n const ltPos = xml.indexOf('<', i);\n\n // Text node before next tag\n if (ltPos === -1) {\n const text = xml.slice(i).trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(xml.slice(i)) });\n break;\n }\n\n if (ltPos > i) {\n const raw = xml.slice(i, ltPos);\n const text = raw.trim();\n if (text) tokens.push({ type: 'text', text: unescapeXml(raw) });\n }\n\n // Handle special constructs before finding the regular tag end\n if (xml.startsWith('<!--', ltPos)) {\n const commentEnd = xml.indexOf('-->', ltPos + 4);\n if (commentEnd === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unterminated comment'));\n i = xml.length;\n } else {\n i = commentEnd + 3;\n }\n continue;\n }\n\n if (xml.startsWith('<![CDATA[', ltPos)) {\n const cdataEnd = xml.indexOf(']]>', ltPos + 9);\n if (cdataEnd === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unterminated CDATA section'));\n i = xml.length;\n } else {\n // CDATA content is literal — no entity unescaping\n const cdataText = xml.slice(ltPos + 9, cdataEnd);\n if (cdataText) tokens.push({ type: 'text', text: cdataText });\n i = cdataEnd + 3;\n }\n continue;\n }\n\n if (xml.startsWith('<?', ltPos)) {\n const piEnd = xml.indexOf('?>', ltPos + 2);\n i = piEnd === -1 ? xml.length : piEnd + 2;\n continue;\n }\n\n if (xml.startsWith('<!', ltPos)) {\n const bangEnd = xml.indexOf('>', ltPos + 2);\n i = bangEnd === -1 ? xml.length : bangEnd + 1;\n continue;\n }\n\n // Regular element tag — use quote-aware scanning\n const gtPos = findTagEnd(xml, ltPos + 1);\n if (gtPos === -1) {\n warnings?.push(createWarning('malformed_xml', 'Unclosed tag at end of input'));\n break;\n }\n\n const tag = xml.slice(ltPos + 1, gtPos);\n\n if (tag.startsWith('/')) {\n tokens.push({ type: 'close', name: localName(tag.slice(1).trim()) });\n } else if (tag.endsWith('/')) {\n const inner = tag.slice(0, -1).trim();\n const spaceIdx = inner.search(/\\s/);\n const name = spaceIdx === -1 ? inner : inner.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : inner.slice(spaceIdx);\n tokens.push({\n type: 'self-close',\n name: localName(name),\n attrs: parseAttrs(attrStr),\n });\n } else {\n const spaceIdx = tag.search(/\\s/);\n const name = spaceIdx === -1 ? tag : tag.slice(0, spaceIdx);\n const attrStr = spaceIdx === -1 ? '' : tag.slice(spaceIdx);\n tokens.push({\n type: 'open',\n name: localName(name),\n attrs: parseAttrs(attrStr),\n });\n }\n\n i = gtPos + 1;\n }\n\n return tokens;\n}\n\n// ─── MARCXML parser ───────────────────────────────────────────────────────────\n\nfunction emitWarning(warnings: MarcWarning[], warning: MarcWarning, options: ParseOptions): void {\n if (options.strict) {\n throw new Error(warning.message);\n }\n const max = options.maxWarnings ?? 100;\n if (warnings.length < max) {\n warnings.push(warning);\n }\n}\n\n/**\n * Parse one `<record>` element's worth of tokens into a MarcRecord.\n */\nfunction parseRecordTokens(\n tokens: Token[],\n start: number,\n warnings: MarcWarning[],\n options: ParseOptions\n): { record: MarcRecord; end: number } {\n let leader = '';\n let hasLeader = false;\n const fields: (ControlField | DataField)[] = [];\n let i = start;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n\n if (tok.type === 'close' && tok.name === 'record') {\n if (!hasLeader) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'Record has no <leader> element'),\n options\n );\n }\n return { record: { leader, fields }, end: i + 1 };\n }\n\n if (tok.type === 'open' && tok.name === 'leader') {\n hasLeader = true;\n i++;\n if (i < tokens.length && tokens[i]!.type === 'text') {\n leader = tokens[i]!.text!.trim();\n i++;\n }\n if (leader.length !== 24) {\n emitWarning(\n warnings,\n createWarning('invalid_leader', `Leader is ${leader.length} characters, expected 24`),\n options\n );\n }\n // consume </leader>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'controlfield missing tag attribute'),\n options\n );\n }\n fields.push({ tag: tag ?? '', data: '' });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'controlfield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'controlfield missing tag attribute'),\n options\n );\n }\n i++;\n let data = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n data = tokens[i]!.text ?? '';\n i++;\n }\n // consume </controlfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n fields.push({ tag: tag ?? '', data });\n continue;\n }\n\n if (tok.type === 'self-close' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'datafield missing tag attribute'),\n options\n );\n }\n fields.push({\n tag: tag ?? '',\n indicator1: tok.attrs?.['ind1'] ?? ' ',\n indicator2: tok.attrs?.['ind2'] ?? ' ',\n subfields: [],\n });\n i++;\n continue;\n }\n\n if (tok.type === 'open' && tok.name === 'datafield') {\n const tag = tok.attrs?.['tag'];\n if (tag === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'datafield missing tag attribute'),\n options\n );\n }\n const indicator1 = tok.attrs?.['ind1'] ?? ' ';\n const indicator2 = tok.attrs?.['ind2'] ?? ' ';\n const subfields: Subfield[] = [];\n i++;\n\n while (i < tokens.length) {\n const stok = tokens[i]!;\n if (stok.type === 'close' && stok.name === 'datafield') {\n i++;\n break;\n }\n if (stok.type === 'open' && stok.name === 'subfield') {\n const code = stok.attrs?.['code'];\n if (code === undefined) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'subfield missing code attribute', undefined, tag),\n options\n );\n }\n i++;\n let value = '';\n if (i < tokens.length && tokens[i]!.type === 'text') {\n value = tokens[i]!.text ?? '';\n i++;\n }\n // consume </subfield>\n if (i < tokens.length && tokens[i]!.type === 'close') i++;\n subfields.push({ code: code ?? '', value });\n continue;\n }\n i++;\n }\n\n fields.push({ tag: tag ?? '', indicator1, indicator2, subfields });\n continue;\n }\n\n i++;\n }\n\n if (!hasLeader) {\n emitWarning(\n warnings,\n createWarning('missing_element', 'Record has no <leader> element'),\n options\n );\n }\n return { record: { leader, fields }, end: i };\n}\n\n/**\n * Parse a MARCXML string, returning per-record parse results including warnings.\n * Unlike parseMarcXml, every record attempt is included even if it produced warnings.\n */\nexport function parseMarcXmlWithWarnings(xml: string, options?: ParseOptions): ParseBatchResult {\n const opts = options ?? {};\n const tokWarnings: MarcWarning[] = [];\n const tokens = tokenise(xml, tokWarnings);\n const results: ParseResult[] = [];\n let i = 0;\n\n while (i < tokens.length) {\n const tok = tokens[i]!;\n if (tok.type === 'open' && tok.name === 'record') {\n const recordWarnings: MarcWarning[] = [];\n const { record, end } = parseRecordTokens(tokens, i + 1, recordWarnings, opts);\n results.push({ record, warnings: recordWarnings });\n i = end;\n continue;\n }\n i++;\n }\n\n if (results.length === 0 && tokWarnings.length > 0) {\n results.push({ record: null, warnings: tokWarnings });\n } else if (tokWarnings.length > 0 && results.length > 0) {\n results[0] = {\n record: results[0]!.record,\n warnings: [...tokWarnings, ...results[0]!.warnings],\n };\n }\n\n return { results };\n}\n\n/**\n * Parse a MARCXML string containing one `<collection>` or one bare `<record>`.\n * Returns all successfully parsed records.\n */\nexport function parseMarcXml(xml: string, options?: ParseOptions): MarcRecord[] {\n const batch = parseMarcXmlWithWarnings(xml, options);\n return batch.results.map((r) => r.record).filter((r): r is MarcRecord => r !== null);\n}\n\n// ─── MARCXML serializer ───────────────────────────────────────────────────────\n\nconst XML_HEADER = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>\\n';\nconst COLLECTION_NS = 'xmlns=\"http://www.loc.gov/MARC21/slim\"';\nconst INDENT = ' ';\n\nfunction serializeMarcXmlRecord(record: MarcRecord): string {\n const lines: string[] = [`<record ${COLLECTION_NS}>`];\n lines.push(`${INDENT}<leader>${escapeXml(record.leader)}</leader>`);\n\n for (const field of record.fields) {\n if (isControlField(field)) {\n lines.push(\n `${INDENT}<controlfield tag=\"${escapeXml(field.tag)}\">${escapeXml(field.data)}</controlfield>`\n );\n } else {\n const ind1 = field.indicator1 === ' ' ? ' ' : field.indicator1;\n const ind2 = field.indicator2 === ' ' ? ' ' : field.indicator2;\n lines.push(\n `${INDENT}<datafield tag=\"${escapeXml(field.tag)}\" ind1=\"${escapeXml(ind1)}\" ind2=\"${escapeXml(ind2)}\">`\n );\n for (const sf of field.subfields) {\n lines.push(\n `${INDENT}${INDENT}<subfield code=\"${escapeXml(sf.code)}\">${escapeXml(sf.value)}</subfield>`\n );\n }\n lines.push(`${INDENT}</datafield>`);\n }\n }\n\n lines.push('</record>');\n return lines.join('\\n');\n}\n\n/**\n * Serialize one or more MarcRecords into a MARCXML `<collection>` document.\n */\nexport function serializeMarcXml(records: MarcRecord[]): string {\n const parts: string[] = [XML_HEADER, `<collection ${COLLECTION_NS}>`];\n\n for (const record of records) {\n // Indent each record element by one level inside <collection>\n const recordXml = serializeMarcXmlRecord(record)\n .split('\\n')\n .map((line) => INDENT + line)\n .join('\\n');\n parts.push(recordXml);\n }\n\n parts.push('</collection>');\n return parts.join('\\n');\n}\n"],"mappings":";;AA0BA,IAAM,IAA0C,oBAAI,IAAI;AAAA,EACtD,CAAC,OAAO,GAAG;AAAA,EACX,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,MAAM,GAAG;AAAA,EACV,CAAC,QAAQ,GAAG;AAAA,EACZ,CAAC,QAAQ,GAAG;AACd,CAAC;AAED,SAAS,EAAY,GAAsB;AACzC,SAAO,EAAK,QAAQ,iDAAA,CAAkD,GAAG,GAAK,GAAK,MAAS;AAC1F,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,QAAI,MAAQ,QAAW;AACrB,YAAM,IAAK,SAAS,GAAK,EAAE;AAC3B,aAAO,KAAM,KAAK,KAAM,UAAW,OAAO,cAAc,CAAE,IAAI;AAAA,IAChE;AACA,WAAO,EAAW,IAAI,CAAI,KAAK;AAAA,EACjC,CAAC;AACH;AAEA,SAAS,EAAU,GAAsB;AACvC,SACE,EAIG,QAAQ,iCAAiC,GAAG,EAC5C,QAAQ,MAAM,OAAO,EACrB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,MAAM,EACpB,QAAQ,MAAM,QAAQ,EAGtB,QAAQ,OAAO,OAAO;AAE7B;AAeA,SAAS,EAAU,GAAqB;AACtC,QAAM,IAAQ,EAAI,QAAQ,GAAG;AAC7B,SAAO,MAAU,KAAK,IAAM,EAAI,MAAM,IAAQ,CAAC;AACjD;AAKA,SAAS,EAAW,GAAyC;AAC3D,QAAM,IAAgC,CAAC,GACjC,IAAK;AACX,MAAI;AACJ,UAAQ,IAAI,EAAG,KAAK,CAAO,OAAO,QAAM;AACtC,UAAM,IAAM,EAAU,EAAE,CAAA,CAAG;AAC3B,IAAA,EAAM,CAAA,IAAO,EAAY,EAAE,CAAA,KAAM,EAAE,CAAA,KAAM,EAAE;AAAA,EAC7C;AACA,SAAO;AACT;AAKA,SAAS,EAAW,GAAa,GAAuB;AACtD,MAAI,IAAW,IACX,IAAW;AACf,WAAS,IAAI,GAAO,IAAI,EAAI,QAAQ,KAAK;AACvC,UAAM,IAAK,EAAI,CAAA;AACf,QAAI,MAAO,OAAO,CAAC,EAAU,CAAA,IAAW,CAAC;AAAA,aAChC,MAAO,OAAO,CAAC,EAAU,CAAA,IAAW,CAAC;AAAA,aACrC,MAAO,OAAO,CAAC,KAAY,CAAC,EAAU,QAAO;AAAA,EACxD;AACA,SAAO;AACT;AAOA,SAAS,EAAS,GAAa,GAAmC;AAChE,QAAM,IAAkB,CAAC;AACzB,MAAI,IAAI;AAER,SAAO,IAAI,EAAI,UAAQ;AACrB,UAAM,IAAQ,EAAI,QAAQ,KAAK,CAAC;AAGhC,QAAI,MAAU,IAAI;AAEhB,MADa,EAAI,MAAM,CAAC,EAAE,KACtB,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,EAAI,MAAM,CAAC,CAAC;AAAA,MAAE,CAAC;AACvE;AAAA,IACF;AAEA,QAAI,IAAQ,GAAG;AACb,YAAM,IAAM,EAAI,MAAM,GAAG,CAAK;AAE9B,MADa,EAAI,KACb,KAAM,EAAO,KAAK;AAAA,QAAE,MAAM;AAAA,QAAQ,MAAM,EAAY,CAAG;AAAA,MAAE,CAAC;AAAA,IAChE;AAGA,QAAI,EAAI,WAAW,QAAQ,CAAK,GAAG;AACjC,YAAM,IAAa,EAAI,QAAQ,OAAO,IAAQ,CAAC;AAC/C,MAAI,MAAe,MACjB,GAAU,KAAK,EAAc,iBAAiB,sBAAsB,CAAC,GACrE,IAAI,EAAI,UAER,IAAI,IAAa;AAEnB;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,aAAa,CAAK,GAAG;AACtC,YAAM,IAAW,EAAI,QAAQ,OAAO,IAAQ,CAAC;AAC7C,UAAI,MAAa;AACf,QAAA,GAAU,KAAK,EAAc,iBAAiB,4BAA4B,CAAC,GAC3E,IAAI,EAAI;AAAA,WACH;AAEL,cAAM,IAAY,EAAI,MAAM,IAAQ,GAAG,CAAQ;AAC/C,QAAI,KAAW,EAAO,KAAK;AAAA,UAAE,MAAM;AAAA,UAAQ,MAAM;AAAA,QAAU,CAAC,GAC5D,IAAI,IAAW;AAAA,MACjB;AACA;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,MAAM,CAAK,GAAG;AAC/B,YAAM,IAAQ,EAAI,QAAQ,MAAM,IAAQ,CAAC;AACzC,UAAI,MAAU,KAAK,EAAI,SAAS,IAAQ;AACxC;AAAA,IACF;AAEA,QAAI,EAAI,WAAW,MAAM,CAAK,GAAG;AAC/B,YAAM,IAAU,EAAI,QAAQ,KAAK,IAAQ,CAAC;AAC1C,UAAI,MAAY,KAAK,EAAI,SAAS,IAAU;AAC5C;AAAA,IACF;AAGA,UAAM,IAAQ,EAAW,GAAK,IAAQ,CAAC;AACvC,QAAI,MAAU,IAAI;AAChB,MAAA,GAAU,KAAK,EAAc,iBAAiB,8BAA8B,CAAC;AAC7E;AAAA,IACF;AAEA,UAAM,IAAM,EAAI,MAAM,IAAQ,GAAG,CAAK;AAEtC,QAAI,EAAI,WAAW,GAAG,EACpB,CAAA,EAAO,KAAK;AAAA,MAAE,MAAM;AAAA,MAAS,MAAM,EAAU,EAAI,MAAM,CAAC,EAAE,KAAK,CAAC;AAAA,IAAE,CAAC;AAAA,aAC1D,EAAI,SAAS,GAAG,GAAG;AAC5B,YAAM,IAAQ,EAAI,MAAM,GAAG,EAAE,EAAE,KAAK,GAC9B,IAAW,EAAM,OAAO,IAAI,GAC5B,IAAO,MAAa,KAAK,IAAQ,EAAM,MAAM,GAAG,CAAQ,GACxD,IAAU,MAAa,KAAK,KAAK,EAAM,MAAM,CAAQ;AAC3D,MAAA,EAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,EAAU,CAAI;AAAA,QACpB,OAAO,EAAW,CAAO;AAAA,MAC3B,CAAC;AAAA,IACH,OAAO;AACL,YAAM,IAAW,EAAI,OAAO,IAAI,GAC1B,IAAO,MAAa,KAAK,IAAM,EAAI,MAAM,GAAG,CAAQ,GACpD,IAAU,MAAa,KAAK,KAAK,EAAI,MAAM,CAAQ;AACzD,MAAA,EAAO,KAAK;AAAA,QACV,MAAM;AAAA,QACN,MAAM,EAAU,CAAI;AAAA,QACpB,OAAO,EAAW,CAAO;AAAA,MAC3B,CAAC;AAAA,IACH;AAEA,QAAI,IAAQ;AAAA,EACd;AAEA,SAAO;AACT;AAIA,SAAS,EAAY,GAAyB,GAAsB,GAA6B;AAC/F,MAAI,EAAQ,OACV,OAAM,IAAI,MAAM,EAAQ,OAAO;AAEjC,QAAM,IAAM,EAAQ,eAAe;AACnC,EAAI,EAAS,SAAS,KACpB,EAAS,KAAK,CAAO;AAEzB;AAKA,SAAS,EACP,GACA,GACA,GACA,GACqC;AACrC,MAAI,IAAS,IACT,IAAY;AAChB,QAAM,IAAuC,CAAC;AAC9C,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AAEnB,QAAI,EAAI,SAAS,WAAW,EAAI,SAAS;AACvC,aAAK,KACH,EACE,GACA,EAAc,mBAAmB,gCAAgC,GACjE,CACF,GAEK;AAAA,QAAE,QAAQ;AAAA,UAAE,QAAA;AAAA,UAAQ,QAAA;AAAA,QAAO;AAAA,QAAG,KAAK,IAAI;AAAA,MAAE;AAGlD,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,MAAA,IAAY,IACZ,KACI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAS,EAAO,CAAA,EAAI,KAAM,KAAK,GAC/B,MAEE,EAAO,WAAW,MACpB,EACE,GACA,EAAc,kBAAkB,aAAa,EAAO,MAAA,0BAAgC,GACpF,CACF,GAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS;AACtD;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,gBAAgB;AAC5D,YAAM,IAAM,EAAI,OAAQ;AACxB,MAAI,MAAQ,UACV,EACE,GACA,EAAc,mBAAmB,oCAAoC,GACrE,CACF,GAEF,EAAO,KAAK;AAAA,QAAE,KAAK,KAAO;AAAA,QAAI,MAAM;AAAA,MAAG,CAAC,GACxC;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,gBAAgB;AACtD,YAAM,IAAM,EAAI,OAAQ;AACxB,MAAI,MAAQ,UACV,EACE,GACA,EAAc,mBAAmB,oCAAoC,GACrE,CACF,GAEF;AACA,UAAI,IAAO;AACX,MAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAO,EAAO,CAAA,EAAI,QAAQ,IAC1B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAO,KAAK;AAAA,QAAE,KAAK,KAAO;AAAA,QAAI,MAAA;AAAA,MAAK,CAAC;AACpC;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,gBAAgB,EAAI,SAAS,aAAa;AACzD,YAAM,IAAM,EAAI,OAAQ;AACxB,MAAI,MAAQ,UACV,EACE,GACA,EAAc,mBAAmB,iCAAiC,GAClE,CACF,GAEF,EAAO,KAAK;AAAA,QACV,KAAK,KAAO;AAAA,QACZ,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,YAAY,EAAI,OAAQ,QAAW;AAAA,QACnC,WAAW,CAAC;AAAA,MACd,CAAC,GACD;AACA;AAAA,IACF;AAEA,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,aAAa;AACnD,YAAM,IAAM,EAAI,OAAQ;AACxB,MAAI,MAAQ,UACV,EACE,GACA,EAAc,mBAAmB,iCAAiC,GAClE,CACF;AAEF,YAAM,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAa,EAAI,OAAQ,QAAW,KACpC,IAAwB,CAAC;AAG/B,WAFA,KAEO,IAAI,EAAO,UAAQ;AACxB,cAAM,IAAO,EAAO,CAAA;AACpB,YAAI,EAAK,SAAS,WAAW,EAAK,SAAS,aAAa;AACtD,UAAA;AACA;AAAA,QACF;AACA,YAAI,EAAK,SAAS,UAAU,EAAK,SAAS,YAAY;AACpD,gBAAM,IAAO,EAAK,OAAQ;AAC1B,UAAI,MAAS,UACX,EACE,GACA,EAAc,mBAAmB,mCAAmC,QAAW,CAAG,GAClF,CACF,GAEF;AACA,cAAI,IAAQ;AACZ,UAAI,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAC3C,IAAQ,EAAO,CAAA,EAAI,QAAQ,IAC3B,MAGE,IAAI,EAAO,UAAU,EAAO,CAAA,EAAI,SAAS,WAAS,KACtD,EAAU,KAAK;AAAA,YAAE,MAAM,KAAQ;AAAA,YAAI,OAAA;AAAA,UAAM,CAAC;AAC1C;AAAA,QACF;AACA,QAAA;AAAA,MACF;AAEA,MAAA,EAAO,KAAK;AAAA,QAAE,KAAK,KAAO;AAAA,QAAI,YAAA;AAAA,QAAY,YAAA;AAAA,QAAY,WAAA;AAAA,MAAU,CAAC;AACjE;AAAA,IACF;AAEA,IAAA;AAAA,EACF;AAEA,SAAK,KACH,EACE,GACA,EAAc,mBAAmB,gCAAgC,GACjE,CACF,GAEK;AAAA,IAAE,QAAQ;AAAA,MAAE,QAAA;AAAA,MAAQ,QAAA;AAAA,IAAO;AAAA,IAAG,KAAK;AAAA,EAAE;AAC9C;AAMA,SAAgB,EAAyB,GAAa,GAA0C;AAC9F,QAAM,IAAO,KAAW,CAAC,GACnB,IAA6B,CAAC,GAC9B,IAAS,EAAS,GAAK,CAAW,GAClC,IAAyB,CAAC;AAChC,MAAI,IAAI;AAER,SAAO,IAAI,EAAO,UAAQ;AACxB,UAAM,IAAM,EAAO,CAAA;AACnB,QAAI,EAAI,SAAS,UAAU,EAAI,SAAS,UAAU;AAChD,YAAM,IAAgC,CAAC,GACjC,EAAE,QAAA,GAAQ,KAAA,EAAA,IAAQ,EAAkB,GAAQ,IAAI,GAAG,GAAgB,CAAI;AAC7E,MAAA,EAAQ,KAAK;AAAA,QAAE,QAAA;AAAA,QAAQ,UAAU;AAAA,MAAe,CAAC,GACjD,IAAI;AACJ;AAAA,IACF;AACA,IAAA;AAAA,EACF;AAEA,SAAI,EAAQ,WAAW,KAAK,EAAY,SAAS,IAC/C,EAAQ,KAAK;AAAA,IAAE,QAAQ;AAAA,IAAM,UAAU;AAAA,EAAY,CAAC,IAC3C,EAAY,SAAS,KAAK,EAAQ,SAAS,MACpD,EAAQ,CAAA,IAAK;AAAA,IACX,QAAQ,EAAQ,CAAA,EAAI;AAAA,IACpB,UAAU,CAAC,GAAG,GAAa,GAAG,EAAQ,CAAA,EAAI,QAAQ;AAAA,EACpD,IAGK,EAAE,SAAA,EAAQ;AACnB;AAMA,SAAgB,EAAa,GAAa,GAAsC;AAE9E,SADc,EAAyB,GAAK,CACrC,EAAM,QAAQ,IAAA,CAAK,MAAM,EAAE,MAAM,EAAE,OAAA,CAAQ,MAAuB,MAAM,IAAI;AACrF;AAIA,IAAM,IAAa;AAAA,GACb,IAAgB,0CAChB,IAAS;AAEf,SAAS,EAAuB,GAA4B;AAC1D,QAAM,IAAkB,CAAC,WAAW,CAAA,GAAgB;AACpD,EAAA,EAAM,KAAK,GAAG,CAAA,WAAiB,EAAU,EAAO,MAAM,CAAA,WAAY;AAElE,aAAW,KAAS,EAAO,OACzB,KAAI,EAAe,CAAK,EACtB,CAAA,EAAM,KACJ,GAAG,CAAA,sBAA4B,EAAU,EAAM,GAAG,CAAA,KAAM,EAAU,EAAM,IAAI,CAAA,iBAC9E;AAAA,OACK;AACL,UAAM,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM,YAC9C,IAAO,EAAM,eAAe,MAAM,MAAM,EAAM;AACpD,IAAA,EAAM,KACJ,GAAG,CAAA,mBAAyB,EAAU,EAAM,GAAG,CAAA,WAAY,EAAU,CAAI,CAAA,WAAY,EAAU,CAAI,CAAA,IACrG;AACA,eAAW,KAAM,EAAM,UACrB,CAAA,EAAM,KACJ,GAAG,CAAA,GAAS,CAAA,mBAAyB,EAAU,EAAG,IAAI,CAAA,KAAM,EAAU,EAAG,KAAK,CAAA,aAChF;AAEF,IAAA,EAAM,KAAK,GAAG,CAAA,cAAoB;AAAA,EACpC;AAGF,SAAA,EAAM,KAAK,WAAW,GACf,EAAM,KAAK;AAAA,CAAI;AACxB;AAKA,SAAgB,EAAiB,GAA+B;AAC9D,QAAM,IAAkB,CAAC,GAAY,eAAe,CAAA,GAAgB;AAEpE,aAAW,KAAU,GAAS;AAE5B,UAAM,IAAY,EAAuB,CAAM,EAC5C,MAAM;AAAA,CAAI,EACV,IAAA,CAAK,MAAS,IAAS,CAAI,EAC3B,KAAK;AAAA,CAAI;AACZ,IAAA,EAAM,KAAK,CAAS;AAAA,EACtB;AAEA,SAAA,EAAM,KAAK,eAAe,GACnB,EAAM,KAAK;AAAA,CAAI;AACxB"}
|
package/dist/parser.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MarcRecord, ParseOptions } from './types';
|
|
1
|
+
import { MarcRecord, ParseOptions, ParseBatchResult } from './types';
|
|
2
2
|
/**
|
|
3
3
|
* Parse a concatenated ISO2709 binary stream into an array of MARC records.
|
|
4
4
|
*
|
|
@@ -11,4 +11,13 @@ import { MarcRecord, ParseOptions } from './types';
|
|
|
11
11
|
* @returns Array of successfully parsed MARC records
|
|
12
12
|
*/
|
|
13
13
|
export declare function parseMarcBinary(buffer: Uint8Array, options?: ParseOptions): MarcRecord[];
|
|
14
|
+
/**
|
|
15
|
+
* Parse a concatenated ISO2709 binary stream, returning per-record
|
|
16
|
+
* parse results including any warnings.
|
|
17
|
+
*
|
|
18
|
+
* Unlike {@link parseMarcBinary}, records that fail to parse are
|
|
19
|
+
* included in the results array (with `record: null`) so callers
|
|
20
|
+
* can inspect their warnings.
|
|
21
|
+
*/
|
|
22
|
+
export declare function parseMarcBinaryWithWarnings(buffer: Uint8Array, options?: ParseOptions): ParseBatchResult;
|
|
14
23
|
//# sourceMappingURL=parser.d.ts.map
|
package/dist/serializer.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { MarcRecord } from './types';
|
|
1
|
+
import { MarcRecord, SerializeBatchResult } from './types';
|
|
2
2
|
/**
|
|
3
3
|
* Options for serializing MARC records.
|
|
4
4
|
*/
|
|
@@ -11,6 +11,11 @@ export interface SerializeOptions {
|
|
|
11
11
|
* to ASCII plus ANSEL Latin/combining characters.
|
|
12
12
|
*/
|
|
13
13
|
readonly encoding?: 'utf8' | 'marc8';
|
|
14
|
+
/**
|
|
15
|
+
* Maximum number of warnings to collect per record before stopping.
|
|
16
|
+
* Default: 100
|
|
17
|
+
*/
|
|
18
|
+
readonly maxWarnings?: number;
|
|
14
19
|
}
|
|
15
20
|
/**
|
|
16
21
|
* Serialize an array of MARC records to a concatenated ISO2709 binary stream.
|
|
@@ -23,4 +28,9 @@ export interface SerializeOptions {
|
|
|
23
28
|
* @returns Concatenated binary representation of all records
|
|
24
29
|
*/
|
|
25
30
|
export declare function serializeMarcBinary(records: MarcRecord[], options?: SerializeOptions): Uint8Array;
|
|
31
|
+
/**
|
|
32
|
+
* Serialize an array of MARC records to a concatenated ISO2709 binary
|
|
33
|
+
* stream, returning per-record serialization warnings.
|
|
34
|
+
*/
|
|
35
|
+
export declare function serializeMarcBinaryWithWarnings(records: MarcRecord[], options?: SerializeOptions): SerializeBatchResult;
|
|
26
36
|
//# sourceMappingURL=serializer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types-BMKDHD1l.js","names":[],"sources":["../src/types.ts"],"sourcesContent":["/**\n * Core type definitions for MARC21 records.\n * All types use readonly modifiers to enforce immutability.\n */\n\n/**\n * A subfield within a MARC data field.\n * Contains a single-character code and its associated value.\n *\n * @example\n * ```typescript\n * const subfield: Subfield = { code: 'a', value: 'The Catcher in the Rye' };\n * ```\n */\nexport interface Subfield {\n readonly code: string;\n readonly value: string;\n}\n\n/**\n * A MARC control field (tag 00X).\n * Control fields have no indicators or subfields, only a tag and data.\n *\n * @example\n * ```typescript\n * const controlField: ControlField = { tag: '001', data: 'ocm12345678' };\n * ```\n */\nexport interface ControlField {\n readonly tag: string;\n readonly data: string;\n}\n\n/**\n * A MARC data field (tag 01X-9XX).\n * Data fields have a tag, two indicators, and one or more subfields.\n *\n * @example\n * ```typescript\n * const dataField: DataField = {\n * tag: '245',\n * indicator1: '1',\n * indicator2: '0',\n * subfields: [\n * { code: 'a', value: 'The Catcher in the Rye /' },\n * { code: 'c', value: 'J.D. Salinger.' },\n * ],\n * };\n * ```\n */\nexport interface DataField {\n readonly tag: string;\n readonly indicator1: string; // Use ' ' for blank indicator\n readonly indicator2: string; // Use ' ' for blank indicator\n readonly subfields: readonly Subfield[];\n}\n\n/**\n * A complete MARC21 record.\n * Contains a 24-character leader and an array of fields (control or data fields).\n *\n * @example\n * ```typescript\n * const record: MarcRecord = {\n * leader: '00000nam 2200000 4500',\n * fields: [\n * { tag: '001', data: 'ocm12345678' },\n * {\n * tag: '245',\n * indicator1: '1',\n * indicator2: '0',\n * subfields: [{ code: 'a', value: 'Title' }],\n * },\n * ],\n * };\n * ```\n */\nexport interface MarcRecord {\n readonly leader: string; // Always 24 characters\n readonly fields: readonly (ControlField | DataField)[];\n}\n\n/**\n * Warning type categories for MARC parsing errors.\n */\nexport type MarcWarningType =\n | 'invalid_leader'\n | 'invalid_directory'\n | 'invalid_field'\n | 'truncated_record'\n | 'encoding_error'\n | 'malformed_xml'\n | 'missing_element'\n | 'invalid_attribute';\n\n/**\n * A warning generated during MARC record parsing.\n * Warnings indicate non-fatal issues that were encountered and recovered from.\n */\nexport interface MarcWarning {\n readonly type: MarcWarningType;\n readonly message: string;\n readonly position?: number; // Byte position in the record\n readonly tag?: string; // Field tag associated with the warning\n}\n\n/**\n * Options for parsing MARC records.\n */\nexport interface ParseOptions {\n /**\n * If true, throw errors instead of collecting warnings.\n * Default: false\n */\n readonly strict?: boolean;\n\n /**\n * Maximum number of warnings to collect before stopping.\n * Prevents memory issues with severely malformed records.\n * Default: 100\n */\n readonly maxWarnings?: number;\n}\n\n/**\n * Result of parsing a MARC record.\n * Contains the parsed record (if successful) and any warnings encountered.\n */\nexport interface ParseResult {\n readonly record: MarcRecord | null;\n readonly warnings: readonly MarcWarning[];\n}\n\n/**\n * Result of parsing multiple MARC records with warning capture.\n * Each entry pairs a parsed record (or null on failure) with its warnings.\n */\nexport interface ParseBatchResult {\n readonly results: readonly ParseResult[];\n}\n\n/**\n * Result of serializing a single MARC record with warning capture.\n */\nexport interface SerializeRecordResult {\n readonly bytes: Uint8Array;\n readonly warnings: readonly MarcWarning[];\n}\n\n/**\n * Result of serializing multiple MARC records with warning capture.\n * Contains the concatenated bytes and per-record results.\n */\nexport interface SerializeBatchResult {\n readonly bytes: Uint8Array;\n readonly results: readonly SerializeRecordResult[];\n}\n\n/**\n * Type guard to check if a field is a control field.\n *\n * @param field - The field to check\n * @returns True if the field is a control field\n *\n * @example\n * ```typescript\n * if (isControlField(field)) {\n * console.log(field.data); // TypeScript knows field is ControlField\n * }\n * ```\n */\nexport function isControlField(field: ControlField | DataField): field is ControlField {\n return 'data' in field;\n}\n\n/**\n * Type guard to check if a field is a data field.\n *\n * @param field - The field to check\n * @returns True if the field is a data field\n *\n * @example\n * ```typescript\n * if (isDataField(field)) {\n * console.log(field.subfields); // TypeScript knows field is DataField\n * }\n * ```\n */\nexport function isDataField(field: ControlField | DataField): field is DataField {\n return 'subfields' in field;\n}\n"],"mappings":"AA2KA,SAAgB,EAAe,GAAwD;AACrF,SAAO,UAAU;AACnB;AAeA,SAAgB,EAAY,GAAqD;AAC/E,SAAO,eAAe;AACxB"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
function t(e){return"data"in e}function n(e){return"subfields"in e}Object.defineProperty(exports,"isControlField",{enumerable:!0,get:function(){return t}});Object.defineProperty(exports,"isDataField",{enumerable:!0,get:function(){return n}});
|
|
2
2
|
|
|
3
|
-
//# sourceMappingURL=types-
|
|
3
|
+
//# sourceMappingURL=types-CsOhH4OF.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types-CsOhH4OF.cjs","names":[],"sources":["../src/types.ts"],"sourcesContent":["/**\n * Core type definitions for MARC21 records.\n * All types use readonly modifiers to enforce immutability.\n */\n\n/**\n * A subfield within a MARC data field.\n * Contains a single-character code and its associated value.\n *\n * @example\n * ```typescript\n * const subfield: Subfield = { code: 'a', value: 'The Catcher in the Rye' };\n * ```\n */\nexport interface Subfield {\n readonly code: string;\n readonly value: string;\n}\n\n/**\n * A MARC control field (tag 00X).\n * Control fields have no indicators or subfields, only a tag and data.\n *\n * @example\n * ```typescript\n * const controlField: ControlField = { tag: '001', data: 'ocm12345678' };\n * ```\n */\nexport interface ControlField {\n readonly tag: string;\n readonly data: string;\n}\n\n/**\n * A MARC data field (tag 01X-9XX).\n * Data fields have a tag, two indicators, and one or more subfields.\n *\n * @example\n * ```typescript\n * const dataField: DataField = {\n * tag: '245',\n * indicator1: '1',\n * indicator2: '0',\n * subfields: [\n * { code: 'a', value: 'The Catcher in the Rye /' },\n * { code: 'c', value: 'J.D. Salinger.' },\n * ],\n * };\n * ```\n */\nexport interface DataField {\n readonly tag: string;\n readonly indicator1: string; // Use ' ' for blank indicator\n readonly indicator2: string; // Use ' ' for blank indicator\n readonly subfields: readonly Subfield[];\n}\n\n/**\n * A complete MARC21 record.\n * Contains a 24-character leader and an array of fields (control or data fields).\n *\n * @example\n * ```typescript\n * const record: MarcRecord = {\n * leader: '00000nam 2200000 4500',\n * fields: [\n * { tag: '001', data: 'ocm12345678' },\n * {\n * tag: '245',\n * indicator1: '1',\n * indicator2: '0',\n * subfields: [{ code: 'a', value: 'Title' }],\n * },\n * ],\n * };\n * ```\n */\nexport interface MarcRecord {\n readonly leader: string; // Always 24 characters\n readonly fields: readonly (ControlField | DataField)[];\n}\n\n/**\n * Warning type categories for MARC parsing errors.\n */\nexport type MarcWarningType =\n | 'invalid_leader'\n | 'invalid_directory'\n | 'invalid_field'\n | 'truncated_record'\n | 'encoding_error'\n | 'malformed_xml'\n | 'missing_element'\n | 'invalid_attribute';\n\n/**\n * A warning generated during MARC record parsing.\n * Warnings indicate non-fatal issues that were encountered and recovered from.\n */\nexport interface MarcWarning {\n readonly type: MarcWarningType;\n readonly message: string;\n readonly position?: number; // Byte position in the record\n readonly tag?: string; // Field tag associated with the warning\n}\n\n/**\n * Options for parsing MARC records.\n */\nexport interface ParseOptions {\n /**\n * If true, throw errors instead of collecting warnings.\n * Default: false\n */\n readonly strict?: boolean;\n\n /**\n * Maximum number of warnings to collect before stopping.\n * Prevents memory issues with severely malformed records.\n * Default: 100\n */\n readonly maxWarnings?: number;\n}\n\n/**\n * Result of parsing a MARC record.\n * Contains the parsed record (if successful) and any warnings encountered.\n */\nexport interface ParseResult {\n readonly record: MarcRecord | null;\n readonly warnings: readonly MarcWarning[];\n}\n\n/**\n * Result of parsing multiple MARC records with warning capture.\n * Each entry pairs a parsed record (or null on failure) with its warnings.\n */\nexport interface ParseBatchResult {\n readonly results: readonly ParseResult[];\n}\n\n/**\n * Result of serializing a single MARC record with warning capture.\n */\nexport interface SerializeRecordResult {\n readonly bytes: Uint8Array;\n readonly warnings: readonly MarcWarning[];\n}\n\n/**\n * Result of serializing multiple MARC records with warning capture.\n * Contains the concatenated bytes and per-record results.\n */\nexport interface SerializeBatchResult {\n readonly bytes: Uint8Array;\n readonly results: readonly SerializeRecordResult[];\n}\n\n/**\n * Type guard to check if a field is a control field.\n *\n * @param field - The field to check\n * @returns True if the field is a control field\n *\n * @example\n * ```typescript\n * if (isControlField(field)) {\n * console.log(field.data); // TypeScript knows field is ControlField\n * }\n * ```\n */\nexport function isControlField(field: ControlField | DataField): field is ControlField {\n return 'data' in field;\n}\n\n/**\n * Type guard to check if a field is a data field.\n *\n * @param field - The field to check\n * @returns True if the field is a data field\n *\n * @example\n * ```typescript\n * if (isDataField(field)) {\n * console.log(field.subfields); // TypeScript knows field is DataField\n * }\n * ```\n */\nexport function isDataField(field: ControlField | DataField): field is DataField {\n return 'subfields' in field;\n}\n"],"mappings":"AA2KA,SAAgB,EAAe,EAAwD,CACrF,MAAO,SAAU,CACnB,CAeA,SAAgB,EAAY,EAAqD,CAC/E,MAAO,cAAe,CACxB"}
|
package/dist/types.d.ts
CHANGED
|
@@ -78,7 +78,7 @@ export interface MarcRecord {
|
|
|
78
78
|
/**
|
|
79
79
|
* Warning type categories for MARC parsing errors.
|
|
80
80
|
*/
|
|
81
|
-
export type MarcWarningType = 'invalid_leader' | 'invalid_directory' | 'invalid_field' | 'truncated_record' | 'encoding_error';
|
|
81
|
+
export type MarcWarningType = 'invalid_leader' | 'invalid_directory' | 'invalid_field' | 'truncated_record' | 'encoding_error' | 'malformed_xml' | 'missing_element' | 'invalid_attribute';
|
|
82
82
|
/**
|
|
83
83
|
* A warning generated during MARC record parsing.
|
|
84
84
|
* Warnings indicate non-fatal issues that were encountered and recovered from.
|
|
@@ -113,6 +113,28 @@ export interface ParseResult {
|
|
|
113
113
|
readonly record: MarcRecord | null;
|
|
114
114
|
readonly warnings: readonly MarcWarning[];
|
|
115
115
|
}
|
|
116
|
+
/**
|
|
117
|
+
* Result of parsing multiple MARC records with warning capture.
|
|
118
|
+
* Each entry pairs a parsed record (or null on failure) with its warnings.
|
|
119
|
+
*/
|
|
120
|
+
export interface ParseBatchResult {
|
|
121
|
+
readonly results: readonly ParseResult[];
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Result of serializing a single MARC record with warning capture.
|
|
125
|
+
*/
|
|
126
|
+
export interface SerializeRecordResult {
|
|
127
|
+
readonly bytes: Uint8Array;
|
|
128
|
+
readonly warnings: readonly MarcWarning[];
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Result of serializing multiple MARC records with warning capture.
|
|
132
|
+
* Contains the concatenated bytes and per-record results.
|
|
133
|
+
*/
|
|
134
|
+
export interface SerializeBatchResult {
|
|
135
|
+
readonly bytes: Uint8Array;
|
|
136
|
+
readonly results: readonly SerializeRecordResult[];
|
|
137
|
+
}
|
|
116
138
|
/**
|
|
117
139
|
* Type guard to check if a field is a control field.
|
|
118
140
|
*
|