@pipobscure/xml 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/parser.js ADDED
@@ -0,0 +1,671 @@
1
+ /**
2
+ * @pipobscure/xml — Recursive-descent XML parser
3
+ *
4
+ * Design goals
5
+ * ─────────────
6
+ * • Correctness for well-formed XML (CalDAV, CardDAV, Atom, WebDAV …)
7
+ * • Tolerant / forgiving: recovers from many real-world XML quirks instead
8
+ * of aborting with an error. The aim is to parse what servers actually send,
9
+ * not only what the spec mandates.
10
+ * • Optimised for small documents (< ~1 MB) — no streaming, no SAX.
11
+ * • Pure TypeScript, zero dependencies, plain-object output (JSON-safe).
12
+ *
13
+ * Tolerance specifics
14
+ * ────────────────────
15
+ * • Unknown named entity references (e.g. `&nbsp;`) are left verbatim
16
+ * (`&nbsp;`) rather than causing an error.
17
+ * • Undefined namespace prefixes resolve to `null` rather than throwing.
18
+ * • `--` inside comments is allowed (browsers are lenient here too).
19
+ * • Missing XML declaration is fine.
20
+ * • Attribute values may use either quote style.
21
+ * • DOCTYPE internal subsets are captured verbatim, not validated.
22
+ * • The BOM (U+FEFF) at the start of the stream is silently skipped.
23
+ */
24
+ import { isXmlWhitespace, isNameStartChar, isNameChar, isHexDigit, isDecimalDigit } from './chars.js';
25
+ // ---------------------------------------------------------------------------
26
+ // Constants
27
+ // ---------------------------------------------------------------------------
28
+ const XML_NS = 'http://www.w3.org/XML/1998/namespace';
29
+ const XMLNS_NS = 'http://www.w3.org/2000/xmlns/';
30
+ /** The five predefined XML entities. Unknown entities are left verbatim. */
31
+ const PREDEFINED_ENTITIES = {
32
+ amp: '&',
33
+ lt: '<',
34
+ gt: '>',
35
+ apos: "'",
36
+ quot: '"',
37
+ };
38
+ // ---------------------------------------------------------------------------
39
+ // Public error type
40
+ // ---------------------------------------------------------------------------
41
+ /**
42
+ * Thrown when the input is so malformed that the parser cannot produce a
43
+ * meaningful tree. In practice the parser tries hard to recover, so only
44
+ * truly unrecoverable situations (e.g. no root element found) reach here.
45
+ */
46
+ export class ParseError extends Error {
47
+ /** Byte offset in the source string where the problem was detected. */
48
+ position;
49
+ /** 1-based line number. */
50
+ line;
51
+ /** 1-based column number. */
52
+ column;
53
+ constructor(message, position, line, column) {
54
+ super(`${message} (line ${line}, col ${column})`);
55
+ this.name = 'XmlParseError';
56
+ this.position = position;
57
+ this.line = line;
58
+ this.column = column;
59
+ }
60
+ }
61
+ // ---------------------------------------------------------------------------
62
+ // Parser
63
+ // ---------------------------------------------------------------------------
64
+ class XmlParser {
65
+ src;
66
+ pos = 0;
67
+ /**
68
+ * Namespace scope stack.
69
+ * Each layer maps prefix → URI; `''` (empty string) is the default NS.
70
+ * The bottom layer holds the two permanently-bound prefixes.
71
+ */
72
+ nsStack = [
73
+ new Map([
74
+ ['xml', XML_NS],
75
+ ['xmlns', XMLNS_NS],
76
+ ]),
77
+ ];
78
+ constructor(src) {
79
+ this.src = src;
80
+ }
81
+ // -------------------------------------------------------------------------
82
+ // Public entry point
83
+ // -------------------------------------------------------------------------
84
+ parse() {
85
+ // Strip BOM
86
+ if (this.src.charCodeAt(0) === 0xfeff) this.pos = 1;
87
+ const children = [];
88
+ // Optional XML declaration
89
+ if (this.startsWith('<?xml') && this.isXmlDeclStart()) {
90
+ children.push(this.parseXmlDeclaration());
91
+ }
92
+ // Misc* (comments, PIs, whitespace) then optional DOCTYPE then Misc*
93
+ this.parseMisc(children);
94
+ if (this.startsWith('<!DOCTYPE') || this.startsWith('<!doctype')) {
95
+ try {
96
+ children.push(this.parseDoctype());
97
+ } catch {
98
+ // If DOCTYPE is deeply malformed just skip to the next '<'
99
+ this.skipToNext('<');
100
+ }
101
+ this.parseMisc(children);
102
+ }
103
+ // Root element
104
+ if (this.pos < this.src.length && this.src[this.pos] === '<') {
105
+ children.push(this.parseElement());
106
+ } else if (this.pos < this.src.length) {
107
+ throw this.error('No root element found');
108
+ }
109
+ // Trailing misc
110
+ this.parseMisc(children);
111
+ return { type: 'document', children };
112
+ }
113
+ // -------------------------------------------------------------------------
114
+ // Prolog / misc
115
+ // -------------------------------------------------------------------------
116
+ /**
117
+ * Determines whether the `<?xml` we see is really the XML declaration
118
+ * (followed by whitespace or `?>`) and not a PI named `xmlfoo`.
119
+ */
120
+ isXmlDeclStart() {
121
+ const c = this.src.charCodeAt(this.pos + 5);
122
+ return isXmlWhitespace(c) || c === 0x3f; // ? for '?>'
123
+ }
124
+ parseMisc(into) {
125
+ while (this.pos < this.src.length) {
126
+ this.skipWhitespace();
127
+ if (this.startsWith('<!--')) {
128
+ into.push(this.parseComment());
129
+ } else if (this.startsWith('<?')) {
130
+ into.push(this.parseProcessingInstruction());
131
+ } else {
132
+ break;
133
+ }
134
+ }
135
+ }
136
+ parseXmlDeclaration() {
137
+ this.expect('<?xml');
138
+ // Tolerate missing whitespace
139
+ this.skipWhitespace();
140
+ let version = '1.0';
141
+ let encoding = null;
142
+ let standalone = null;
143
+ if (this.startsWith('version')) {
144
+ this.advanceBy(7);
145
+ this.skipWhitespace();
146
+ if (this.current() === '=') {
147
+ this.advance();
148
+ }
149
+ this.skipWhitespace();
150
+ version = this.parseQuotedValue();
151
+ this.skipWhitespace();
152
+ }
153
+ if (this.startsWith('encoding')) {
154
+ this.advanceBy(8);
155
+ this.skipWhitespace();
156
+ if (this.current() === '=') {
157
+ this.advance();
158
+ }
159
+ this.skipWhitespace();
160
+ encoding = this.parseQuotedValue();
161
+ this.skipWhitespace();
162
+ }
163
+ if (this.startsWith('standalone')) {
164
+ this.advanceBy(10);
165
+ this.skipWhitespace();
166
+ if (this.current() === '=') {
167
+ this.advance();
168
+ }
169
+ this.skipWhitespace();
170
+ const val = this.parseQuotedValue();
171
+ standalone = val === 'yes' ? true : val === 'no' ? false : null;
172
+ this.skipWhitespace();
173
+ }
174
+ // Consume ?> — tolerate just > if ?> is missing
175
+ if (this.startsWith('?>')) {
176
+ this.advanceBy(2);
177
+ } else if (this.current() === '>') {
178
+ this.advance();
179
+ }
180
+ return { type: 'xml-declaration', version, encoding, standalone };
181
+ }
182
+ parseDoctype() {
183
+ // Case-insensitive match already confirmed by caller
184
+ this.advanceBy('<!DOCTYPE'.length);
185
+ this.skipWhitespace();
186
+ const name = this.tryParseName() ?? 'unknown';
187
+ this.skipWhitespace();
188
+ let publicId = null;
189
+ let systemId = null;
190
+ let internalSubset = null;
191
+ const kw = this.peekKeyword();
192
+ if (kw === 'PUBLIC') {
193
+ this.advanceBy(6);
194
+ this.skipWhitespace();
195
+ publicId = this.parseQuotedValue();
196
+ this.skipWhitespace();
197
+ if (this.current() === '"' || this.current() === "'") {
198
+ systemId = this.parseQuotedValue();
199
+ this.skipWhitespace();
200
+ }
201
+ } else if (kw === 'SYSTEM') {
202
+ this.advanceBy(6);
203
+ this.skipWhitespace();
204
+ systemId = this.parseQuotedValue();
205
+ this.skipWhitespace();
206
+ }
207
+ // Internal subset
208
+ if (this.current() === '[') {
209
+ this.advance();
210
+ const start = this.pos;
211
+ // Scan for the matching ']', respecting quoted strings
212
+ while (this.pos < this.src.length && this.current() !== ']') {
213
+ if (this.current() === '"' || this.current() === "'") {
214
+ const q = this.current();
215
+ this.advance();
216
+ while (this.pos < this.src.length && this.current() !== q) this.advance();
217
+ if (this.pos < this.src.length) this.advance();
218
+ } else {
219
+ this.advance();
220
+ }
221
+ }
222
+ internalSubset = this.src.slice(start, this.pos);
223
+ if (this.current() === ']') this.advance();
224
+ this.skipWhitespace();
225
+ }
226
+ // Consume closing >
227
+ if (this.current() === '>') this.advance();
228
+ return { type: 'doctype', name, publicId, systemId, internalSubset };
229
+ }
230
+ // -------------------------------------------------------------------------
231
+ // Element
232
+ // -------------------------------------------------------------------------
233
+ parseElement() {
234
+ this.expect('<');
235
+ const qname = this.parseQName();
236
+ this.skipWhitespace();
237
+ const rawAttrs = [];
238
+ const nsDecls = new Map(); // prefix → URI, '' = default
239
+ while (this.pos < this.src.length && this.current() !== '>' && !this.startsWith('/>')) {
240
+ const ch = this.src.charCodeAt(this.pos);
241
+ if (!isNameStartChar(ch)) {
242
+ // Garbage character inside element tag — skip it tolerantly
243
+ this.advance();
244
+ continue;
245
+ }
246
+ const attrQName = this.parseQName();
247
+ this.skipWhitespace();
248
+ // Tolerate missing = sign
249
+ if (this.current() === '=') {
250
+ this.advance();
251
+ }
252
+ this.skipWhitespace();
253
+ // Tolerate missing quotes — if no quote, read until whitespace/>//>
254
+ const value = this.parseQuotedValueOrBare();
255
+ this.skipWhitespace();
256
+ // Detect namespace declarations
257
+ if (attrQName.prefix === null && attrQName.local === 'xmlns') {
258
+ nsDecls.set('', value);
259
+ } else if (attrQName.prefix === 'xmlns') {
260
+ nsDecls.set(attrQName.local, value);
261
+ }
262
+ rawAttrs.push({ prefix: attrQName.prefix, local: attrQName.local, value });
263
+ }
264
+ // ── Push namespace scope ───────────────────────────────────────────────
265
+ this.nsStack.push(nsDecls);
266
+ // ── Resolve element namespace ──────────────────────────────────────────
267
+ const elemNS = this.resolveNS(qname.prefix, true);
268
+ // ── Resolve attribute namespaces ───────────────────────────────────────
269
+ const attributes = rawAttrs.map((raw) => {
270
+ let ns;
271
+ if (raw.prefix === null && raw.local === 'xmlns') {
272
+ ns = XMLNS_NS;
273
+ } else if (raw.prefix === 'xmlns') {
274
+ ns = XMLNS_NS;
275
+ } else if (raw.prefix !== null) {
276
+ ns = this.resolveNS(raw.prefix, false);
277
+ } else {
278
+ ns = null; // unprefixed attributes have no namespace
279
+ }
280
+ return { name: raw.local, prefix: raw.prefix, namespace: ns, value: raw.value };
281
+ });
282
+ // ── Handle self-closing vs content ─────────────────────────────────────
283
+ let selfClosing = false;
284
+ if (this.startsWith('/>')) {
285
+ this.advanceBy(2);
286
+ selfClosing = true;
287
+ } else if (this.current() === '>') {
288
+ this.advance();
289
+ } else {
290
+ // Malformed — treat as self-closing and try to recover
291
+ selfClosing = true;
292
+ }
293
+ const children = [];
294
+ if (!selfClosing) {
295
+ this.parseChildren(children, qname);
296
+ }
297
+ // ── Pop namespace scope ────────────────────────────────────────────────
298
+ this.nsStack.pop();
299
+ return {
300
+ type: 'element',
301
+ name: qname.local,
302
+ prefix: qname.prefix,
303
+ namespace: elemNS,
304
+ attributes,
305
+ children,
306
+ };
307
+ }
308
+ parseChildren(into, parent) {
309
+ while (this.pos < this.src.length) {
310
+ if (this.startsWith('</')) {
311
+ // Closing tag
312
+ this.advanceBy(2);
313
+ const closeQName = this.tryParseQName();
314
+ this.skipWhitespace();
315
+ if (this.current() === '>') this.advance();
316
+ // Tolerant: accept mismatched closing tags (just stop parsing children)
317
+ if (closeQName === null || closeQName.local !== parent.local || closeQName.prefix !== parent.prefix) {
318
+ // Rewind if the tag was for a parent — we handle mismatches by
319
+ // simply returning so the parent's loop can consume the tag.
320
+ // Because we already consumed it, we just return.
321
+ }
322
+ return;
323
+ }
324
+ if (this.startsWith('<![CDATA[')) {
325
+ into.push(this.parseCData());
326
+ } else if (this.startsWith('<!--')) {
327
+ into.push(this.parseComment());
328
+ } else if (this.startsWith('<?')) {
329
+ into.push(this.parseProcessingInstruction());
330
+ } else if (this.current() === '<') {
331
+ // Peek ahead — could be a malformed '<' in text
332
+ const nextCode = this.src.charCodeAt(this.pos + 1);
333
+ if (isNameStartChar(nextCode) || nextCode === 0x3a /* : */ || nextCode === 0x5f /* _ */) {
334
+ into.push(this.parseElement());
335
+ } else {
336
+ // Treat the stray '<' as text
337
+ into.push(this.parseText());
338
+ }
339
+ } else {
340
+ const text = this.parseText();
341
+ if (text.value.length > 0) into.push(text);
342
+ }
343
+ }
344
+ // End of input without closing tag — tolerated
345
+ }
346
+ // -------------------------------------------------------------------------
347
+ // Leaf nodes
348
+ // -------------------------------------------------------------------------
349
+ parseComment() {
350
+ this.expect('<!--');
351
+ const start = this.pos;
352
+ const end = this.src.indexOf('-->', this.pos);
353
+ if (end === -1) {
354
+ // Unterminated comment — consume the rest
355
+ const value = this.src.slice(start);
356
+ this.pos = this.src.length;
357
+ return { type: 'comment', value };
358
+ }
359
+ const value = this.src.slice(start, end);
360
+ this.pos = end + 3;
361
+ return { type: 'comment', value };
362
+ }
363
+ parseCData() {
364
+ this.expect('<![CDATA[');
365
+ const start = this.pos;
366
+ const end = this.src.indexOf(']]>', this.pos);
367
+ if (end === -1) {
368
+ const value = this.src.slice(start);
369
+ this.pos = this.src.length;
370
+ return { type: 'cdata', value };
371
+ }
372
+ const value = this.src.slice(start, end);
373
+ this.pos = end + 3;
374
+ return { type: 'cdata', value };
375
+ }
376
+ parseProcessingInstruction() {
377
+ this.expect('<?');
378
+ const target = this.tryParseName() ?? '_pi';
379
+ let data = '';
380
+ if (this.pos < this.src.length && isXmlWhitespace(this.src.charCodeAt(this.pos))) {
381
+ this.skipWhitespace();
382
+ const end = this.src.indexOf('?>', this.pos);
383
+ if (end === -1) {
384
+ data = this.src.slice(this.pos);
385
+ this.pos = this.src.length;
386
+ } else {
387
+ data = this.src.slice(this.pos, end).trimEnd();
388
+ this.pos = end + 2;
389
+ }
390
+ } else {
391
+ // No data, just consume '?>'
392
+ if (this.startsWith('?>')) this.advanceBy(2);
393
+ }
394
+ return { type: 'processing-instruction', target, data };
395
+ }
396
+ parseText() {
397
+ const parts = [];
398
+ while (this.pos < this.src.length && this.current() !== '<') {
399
+ if (this.current() === '&') {
400
+ parts.push(this.parseEntityRef());
401
+ } else {
402
+ // Fast-path: find the next special character
403
+ const next = this.nextSpecialInText();
404
+ if (next === -1) {
405
+ parts.push(this.src.slice(this.pos));
406
+ this.pos = this.src.length;
407
+ } else {
408
+ parts.push(this.src.slice(this.pos, next));
409
+ this.pos = next;
410
+ }
411
+ }
412
+ }
413
+ return { type: 'text', value: parts.join('') };
414
+ }
415
+ /** Returns the position of the next `<` or `&` at or after `this.pos`. */
416
+ nextSpecialInText() {
417
+ const lt = this.src.indexOf('<', this.pos);
418
+ const amp = this.src.indexOf('&', this.pos);
419
+ if (lt === -1 && amp === -1) return -1;
420
+ if (lt === -1) return amp;
421
+ if (amp === -1) return lt;
422
+ return lt < amp ? lt : amp;
423
+ }
424
+ // -------------------------------------------------------------------------
425
+ // Entity references
426
+ // -------------------------------------------------------------------------
427
+ parseEntityRef() {
428
+ this.advance(); // skip &
429
+ if (this.current() === '#') {
430
+ this.advance(); // skip #
431
+ return this.parseCharRef();
432
+ }
433
+ const start = this.pos;
434
+ while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
435
+ this.pos++;
436
+ }
437
+ const name = this.src.slice(start, this.pos);
438
+ if (this.current() === ';') {
439
+ this.advance();
440
+ }
441
+ // Tolerate missing semicolon
442
+ const resolved = PREDEFINED_ENTITIES[name];
443
+ if (resolved !== undefined) return resolved;
444
+ // Bare & with no recognisable name (e.g. "& " in malformed content) — preserve literally
445
+ if (name.length === 0) return '&';
446
+ // Unknown named entity — return verbatim with & and ;
447
+ return `&${name};`;
448
+ }
449
+ parseCharRef() {
450
+ let codePoint;
451
+ if (this.current() === 'x' || this.current() === 'X') {
452
+ this.advance();
453
+ let hex = '';
454
+ while (this.pos < this.src.length && isHexDigit(this.src.charCodeAt(this.pos))) {
455
+ hex += this.src[this.pos++];
456
+ }
457
+ codePoint = hex.length > 0 ? parseInt(hex, 16) : 0xfffd;
458
+ } else {
459
+ let dec = '';
460
+ while (this.pos < this.src.length && isDecimalDigit(this.src.charCodeAt(this.pos))) {
461
+ dec += this.src[this.pos++];
462
+ }
463
+ codePoint = dec.length > 0 ? parseInt(dec, 10) : 0xfffd;
464
+ }
465
+ if (this.current() === ';') this.advance();
466
+ // Guard against surrogates and invalid code points
467
+ if (codePoint > 0x10ffff || (codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint === 0) {
468
+ return '\ufffd';
469
+ }
470
+ return String.fromCodePoint(codePoint);
471
+ }
472
+ // -------------------------------------------------------------------------
473
+ // Attribute value parsing
474
+ // -------------------------------------------------------------------------
475
+ parseQuotedValue() {
476
+ const ch = this.current();
477
+ if (ch !== '"' && ch !== "'") {
478
+ // No quote — tolerate and return empty string
479
+ return '';
480
+ }
481
+ this.advance(); // opening quote
482
+ const parts = [];
483
+ while (this.pos < this.src.length && this.current() !== ch) {
484
+ if (this.current() === '&') {
485
+ parts.push(this.parseEntityRef());
486
+ } else {
487
+ const next = this.src.indexOf(ch, this.pos);
488
+ const amp = this.src.indexOf('&', this.pos);
489
+ let end;
490
+ if (next === -1) {
491
+ end = this.src.length;
492
+ } else if (amp !== -1 && amp < next) {
493
+ end = amp;
494
+ } else {
495
+ end = next;
496
+ }
497
+ parts.push(this.src.slice(this.pos, end));
498
+ this.pos = end;
499
+ }
500
+ }
501
+ if (this.pos < this.src.length) this.advance(); // closing quote
502
+ return parts.join('');
503
+ }
504
+ /**
505
+ * Like `parseQuotedValue` but also handles unquoted attribute values
506
+ * (e.g. `attr=value` — common in broken HTML-as-XML).
507
+ */
508
+ parseQuotedValueOrBare() {
509
+ const ch = this.current();
510
+ if (ch === '"' || ch === "'") return this.parseQuotedValue();
511
+ // Bare value — read until whitespace, >, or />
512
+ const start = this.pos;
513
+ while (this.pos < this.src.length && !isXmlWhitespace(this.src.charCodeAt(this.pos)) && this.current() !== '>' && !this.startsWith('/>')) {
514
+ this.pos++;
515
+ }
516
+ return this.src.slice(start, this.pos);
517
+ }
518
+ // -------------------------------------------------------------------------
519
+ // Name / QName parsing
520
+ // -------------------------------------------------------------------------
521
+ /**
522
+ * Parses an XML Name (may include `:` for QName tokenisation).
523
+ * Throws on invalid input.
524
+ */
525
+ parseName() {
526
+ const start = this.pos;
527
+ if (!isNameStartChar(this.src.charCodeAt(this.pos))) {
528
+ throw this.error(`Expected XML name character, got ${JSON.stringify(this.current())}`);
529
+ }
530
+ while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
531
+ this.pos++;
532
+ }
533
+ return this.src.slice(start, this.pos);
534
+ }
535
+ /** Like `parseName` but returns `null` instead of throwing. */
536
+ tryParseName() {
537
+ if (!isNameStartChar(this.src.charCodeAt(this.pos))) return null;
538
+ const start = this.pos;
539
+ while (this.pos < this.src.length && isNameChar(this.src.charCodeAt(this.pos))) {
540
+ this.pos++;
541
+ }
542
+ return this.src.slice(start, this.pos);
543
+ }
544
+ /** Parses a qualified name and splits it on the first `:`. */
545
+ parseQName() {
546
+ const name = this.parseName();
547
+ const colon = name.indexOf(':');
548
+ if (colon !== -1) {
549
+ return { prefix: name.slice(0, colon), local: name.slice(colon + 1) };
550
+ }
551
+ return { prefix: null, local: name };
552
+ }
553
+ /** Like `parseQName` but returns `null` instead of throwing. */
554
+ tryParseQName() {
555
+ const name = this.tryParseName();
556
+ if (name === null) return null;
557
+ const colon = name.indexOf(':');
558
+ if (colon !== -1) {
559
+ return { prefix: name.slice(0, colon), local: name.slice(colon + 1) };
560
+ }
561
+ return { prefix: null, local: name };
562
+ }
563
+ // -------------------------------------------------------------------------
564
+ // Namespace resolution
565
+ // -------------------------------------------------------------------------
566
+ /**
567
+ * Resolves `prefix` against the current namespace scope stack.
568
+ *
569
+ * - `prefix === 'xml'` → always `XML_NS`
570
+ * - `prefix === 'xmlns'` → always `XMLNS_NS`
571
+ * - `prefix === null` and `isElement` → default namespace (may be null)
572
+ * - `prefix === null` and `!isElement` → `null` (attrs have no default NS)
573
+ * - Unknown prefix → `null` (tolerant; spec says this is an error)
574
+ */
575
+ resolveNS(prefix, isElement) {
576
+ if (prefix === 'xml') return XML_NS;
577
+ if (prefix === 'xmlns') return XMLNS_NS;
578
+ const key = prefix ?? (isElement ? '' : null);
579
+ if (key === null) return null;
580
+ for (let i = this.nsStack.length - 1; i >= 0; i--) {
581
+ const scope = this.nsStack[i];
582
+ if (scope !== undefined && scope.has(key)) {
583
+ const uri = scope.get(key);
584
+ return uri === '' ? null : uri; // empty URI = un-declare
585
+ }
586
+ }
587
+ // Unknown prefix — tolerate by returning null
588
+ return null;
589
+ }
590
+ // -------------------------------------------------------------------------
591
+ // Low-level cursor helpers
592
+ // -------------------------------------------------------------------------
593
+ current() {
594
+ return this.src[this.pos] ?? '';
595
+ }
596
+ advance() {
597
+ this.pos++;
598
+ }
599
+ advanceBy(n) {
600
+ this.pos += n;
601
+ }
602
+ startsWith(str) {
603
+ return this.src.startsWith(str, this.pos);
604
+ }
605
+ expect(str) {
606
+ if (!this.src.startsWith(str, this.pos)) {
607
+ throw this.error(`Expected ${JSON.stringify(str)}, got ${JSON.stringify(this.src.slice(this.pos, this.pos + str.length))}`);
608
+ }
609
+ this.pos += str.length;
610
+ }
611
+ skipWhitespace() {
612
+ while (this.pos < this.src.length && isXmlWhitespace(this.src.charCodeAt(this.pos))) {
613
+ this.pos++;
614
+ }
615
+ }
616
+ /** Scans forward until the given character is found (useful for recovery). */
617
+ skipToNext(ch) {
618
+ const idx = this.src.indexOf(ch, this.pos);
619
+ this.pos = idx === -1 ? this.src.length : idx;
620
+ }
621
+ /**
622
+ * Reads up to 8 ASCII uppercase characters to detect DOCTYPE keywords
623
+ * (PUBLIC / SYSTEM) without consuming them.
624
+ */
625
+ peekKeyword() {
626
+ let s = '';
627
+ for (let i = this.pos; i < this.src.length && i < this.pos + 8; i++) {
628
+ const c = this.src.charCodeAt(i);
629
+ if (c >= 0x41 && c <= 0x5a) s += String.fromCharCode(c);
630
+ else if (c >= 0x61 && c <= 0x7a) s += String.fromCharCode(c - 32);
631
+ else break;
632
+ }
633
+ return s;
634
+ }
635
+ // -------------------------------------------------------------------------
636
+ // Error helper
637
+ // -------------------------------------------------------------------------
638
+ error(message) {
639
+ // Compute line/col lazily (only on error)
640
+ let line = 1;
641
+ let col = 1;
642
+ for (let i = 0; i < this.pos && i < this.src.length; i++) {
643
+ if (this.src.charCodeAt(i) === 0x0a) {
644
+ line++;
645
+ col = 1;
646
+ } else {
647
+ col++;
648
+ }
649
+ }
650
+ return new ParseError(message, this.pos, line, col);
651
+ }
652
+ }
653
+ // ---------------------------------------------------------------------------
654
+ // Public API
655
+ // ---------------------------------------------------------------------------
656
+ /**
657
+ * Parses an XML string into a `Document` tree of plain JS objects.
658
+ *
659
+ * The parser is deliberately forgiving:
660
+ * - Unknown entity references are preserved verbatim.
661
+ * - Undeclared namespace prefixes resolve to `null`.
662
+ * - Minor structural quirks (missing closing quotes, stray characters in
663
+ * element tags, unterminated comments) are recovered from where possible.
664
+ *
665
+ * @throws {ParseError} Only for unrecoverable structural failures such as a
666
+ * completely absent root element.
667
+ */
668
+ export function parse(xml) {
669
+ return new XmlParser(xml).parse();
670
+ }
671
+ //# sourceMappingURL=parser.js.map