@rgrove/parse-xml 4.1.0 → 4.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/browser.js CHANGED
@@ -1,4 +1,4 @@
1
- /*! @rgrove/parse-xml v4.1.0 | ISC License | Copyright Ryan Grove */
1
+ /*! @rgrove/parse-xml v4.2.1 | ISC License | Copyright Ryan Grove */
2
2
  "use strict";
3
3
  var __defProp = Object.defineProperty;
4
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -40,24 +40,24 @@ var emptyString = "";
40
40
  var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
41
41
  var StringScanner = class {
42
42
  constructor(string) {
43
- this.k = this.q(string, true);
43
+ this.k = this.u(string, true);
44
44
  this.d = 0;
45
45
  this.length = string.length;
46
- this.n = this.k !== this.length;
47
- this.m = string;
48
- if (this.n) {
46
+ this.l = this.k !== this.length;
47
+ this.h = string;
48
+ if (this.l) {
49
49
  let charsToBytes = [];
50
50
  for (let byteIndex = 0, charIndex = 0; charIndex < this.k; ++charIndex) {
51
51
  charsToBytes[charIndex] = byteIndex;
52
52
  byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
53
53
  }
54
- this.y = charsToBytes;
54
+ this.A = charsToBytes;
55
55
  }
56
56
  }
57
57
  /**
58
58
  * Whether the current character index is at the end of the input string.
59
59
  */
60
- get z() {
60
+ get B() {
61
61
  return this.d >= this.k;
62
62
  }
63
63
  // -- Protected Methods ------------------------------------------------------
@@ -65,7 +65,7 @@ var StringScanner = class {
65
65
  * Returns the number of characters in the given string, which may differ from
66
66
  * the byte length if the string contains multibyte characters.
67
67
  */
68
- q(string, multiByteSafe = this.n) {
68
+ u(string, multiByteSafe = this.l) {
69
69
  return multiByteSafe ? string.replace(surrogatePair, "_").length : string.length;
70
70
  }
71
71
  // -- Public Methods ---------------------------------------------------------
@@ -73,16 +73,16 @@ var StringScanner = class {
73
73
  * Advances the scanner by the given number of characters, stopping if the end
74
74
  * of the string is reached.
75
75
  */
76
- g(count = 1) {
76
+ p(count = 1) {
77
77
  this.d = Math.min(this.k, this.d + count);
78
78
  }
79
79
  /**
80
80
  * Returns the byte index of the given character index in the string. The two
81
81
  * may differ in strings that contain multibyte characters.
82
82
  */
83
- i(charIndex = this.d) {
83
+ f(charIndex = this.d) {
84
84
  var _a;
85
- return this.n ? (_a = this.y[charIndex]) != null ? _a : Infinity : charIndex;
85
+ return this.l ? (_a = this.A[charIndex]) != null ? _a : Infinity : charIndex;
86
86
  }
87
87
  /**
88
88
  * Consumes and returns the given number of characters if possible, advancing
@@ -90,46 +90,53 @@ var StringScanner = class {
90
90
  *
91
91
  * If no characters could be consumed, an empty string will be returned.
92
92
  */
93
- F(count = 1) {
94
- let chars = this.h(count);
95
- this.g(count);
93
+ G(charCount = 1) {
94
+ let chars = this.m(charCount);
95
+ this.p(charCount);
96
96
  return chars;
97
97
  }
98
98
  /**
99
- * Consumes a match for the given sticky regex, advances the scanner, updates
100
- * the `lastIndex` property of the regex, and returns the matching string.
99
+ * Consumes and returns the given number of bytes if possible, advancing the
100
+ * scanner and stopping if the end of the string is reached.
101
101
  *
102
- * The regex must have a sticky flag ("y") so that its `lastIndex` prop can be
103
- * used to anchor the match at the current scanner position.
102
+ * It's up to the caller to ensure that the given byte count doesn't split a
103
+ * multibyte character.
104
104
  *
105
- * Returns the consumed string, or an empty string if nothing was consumed.
105
+ * If no bytes could be consumed, an empty string will be returned.
106
106
  */
107
- G(regex) {
108
- if (!regex.sticky) {
109
- throw new Error('`regex` must have a sticky flag ("y")');
110
- }
111
- regex.lastIndex = this.i();
112
- let result = regex.exec(this.m);
113
- if (result === null || result.length === 0) {
114
- return emptyString;
115
- }
116
- let match = result[0];
117
- this.g(this.q(match));
118
- return match;
107
+ v(byteCount) {
108
+ let byteIndex = this.f();
109
+ let result = this.h.slice(byteIndex, byteIndex + byteCount);
110
+ this.p(this.u(result));
111
+ return result;
119
112
  }
120
113
  /**
121
- * Consumes and returns all characters for which the given function returns a
122
- * truthy value, stopping on the first falsy return value or if the end of the
123
- * input is reached.
114
+ * Consumes and returns all characters for which the given function returns
115
+ * `true`, stopping when `false` is returned or the end of the input is
116
+ * reached.
124
117
  */
125
- v(fn) {
126
- let char;
127
- let match = emptyString;
128
- while ((char = this.h()) && fn(char)) {
129
- match += char;
130
- this.g();
118
+ w(fn) {
119
+ let { length, l: multiByteMode, h: string } = this;
120
+ let startByteIndex = this.f();
121
+ let endByteIndex = startByteIndex;
122
+ if (multiByteMode) {
123
+ while (endByteIndex < length) {
124
+ let char = string[endByteIndex];
125
+ let isSurrogatePair = char >= "\uD800" && char <= "\uDBFF";
126
+ if (isSurrogatePair) {
127
+ char += string[endByteIndex + 1];
128
+ }
129
+ if (!fn(char)) {
130
+ break;
131
+ }
132
+ endByteIndex += isSurrogatePair ? 2 : 1;
133
+ }
134
+ } else {
135
+ while (endByteIndex < length && fn(string[endByteIndex])) {
136
+ ++endByteIndex;
137
+ }
131
138
  }
132
- return match;
139
+ return this.v(endByteIndex - startByteIndex);
133
140
  }
134
141
  /**
135
142
  * Consumes the given string if it exists at the current character index, and
@@ -138,29 +145,11 @@ var StringScanner = class {
138
145
  * If the given string doesn't exist at the current character index, an empty
139
146
  * string will be returned and the scanner will not be advanced.
140
147
  */
141
- Q(stringToConsume) {
142
- if (this.b(stringToConsume)) {
143
- return stringToConsume;
144
- }
145
- if (this.n) {
146
- let { length } = stringToConsume;
147
- let charLengthToMatch = this.q(stringToConsume);
148
- if (charLengthToMatch !== length && stringToConsume === this.h(charLengthToMatch)) {
149
- this.g(charLengthToMatch);
150
- return stringToConsume;
151
- }
152
- }
153
- return emptyString;
154
- }
155
- /**
156
- * Does the same thing as `consumeString()`, but doesn't support consuming
157
- * multibyte characters. This can be faster if you only need to match single
158
- * byte characters.
159
- */
160
148
  b(stringToConsume) {
161
149
  let { length } = stringToConsume;
162
- if (this.h(length) === stringToConsume) {
163
- this.g(length);
150
+ let byteIndex = this.f();
151
+ if (stringToConsume === this.h.slice(byteIndex, byteIndex + length)) {
152
+ this.p(length === 1 ? 1 : this.u(stringToConsume));
164
153
  return stringToConsume;
165
154
  }
166
155
  return emptyString;
@@ -172,15 +161,9 @@ var StringScanner = class {
172
161
  *
173
162
  * Returns the consumed string, or an empty string if nothing was consumed.
174
163
  */
175
- A(regex) {
176
- let restOfString = this.m.slice(this.i());
177
- let matchByteIndex = restOfString.search(regex);
178
- if (matchByteIndex <= 0) {
179
- return emptyString;
180
- }
181
- let result = restOfString.slice(0, matchByteIndex);
182
- this.g(this.q(result));
183
- return result;
164
+ x(regex) {
165
+ let matchByteIndex = this.h.slice(this.f()).search(regex);
166
+ return matchByteIndex > 0 ? this.v(matchByteIndex) : emptyString;
184
167
  }
185
168
  /**
186
169
  * Consumes characters until the given string is found, advancing the scanner
@@ -189,34 +172,19 @@ var StringScanner = class {
189
172
  *
190
173
  * Returns the consumed string, or an empty string if nothing was consumed.
191
174
  */
192
- t(searchString) {
193
- let { m: string } = this;
194
- let byteIndex = this.i();
195
- let matchByteIndex = string.indexOf(searchString, byteIndex);
196
- if (matchByteIndex <= 0) {
197
- return emptyString;
198
- }
199
- let result = string.slice(byteIndex, matchByteIndex);
200
- this.g(this.q(result));
201
- return result;
175
+ s(searchString) {
176
+ let byteIndex = this.f();
177
+ let matchByteIndex = this.h.indexOf(searchString, byteIndex);
178
+ return matchByteIndex > 0 ? this.v(matchByteIndex - byteIndex) : emptyString;
202
179
  }
203
180
  /**
204
181
  * Returns the given number of characters starting at the current character
205
182
  * index, without advancing the scanner and without exceeding the end of the
206
183
  * input string.
207
184
  */
208
- h(count = 1) {
209
- let { d: charIndex, n: multiByteMode, m: string } = this;
210
- if (multiByteMode) {
211
- if (charIndex >= this.k) {
212
- return emptyString;
213
- }
214
- return string.slice(
215
- this.i(charIndex),
216
- this.i(charIndex + count)
217
- );
218
- }
219
- return string.slice(charIndex, charIndex + count);
185
+ m(count = 1) {
186
+ let { d: charIndex, h: string } = this;
187
+ return this.l ? string.slice(this.f(charIndex), this.f(charIndex + count)) : string.slice(charIndex, charIndex + count);
220
188
  }
221
189
  /**
222
190
  * Resets the scanner position to the given character _index_, or to the start
@@ -225,14 +193,14 @@ var StringScanner = class {
225
193
  * If _index_ is negative, the scanner position will be moved backward by that
226
194
  * many characters, stopping if the beginning of the string is reached.
227
195
  */
228
- o(index = 0) {
196
+ n(index = 0) {
229
197
  this.d = index >= 0 ? Math.min(this.k, index) : Math.max(0, this.d + index);
230
198
  }
231
199
  };
232
200
 
233
201
  // src/lib/syntax.ts
234
- var attValueCharDoubleQuote = /[^"&<]+/y;
235
- var attValueCharSingleQuote = /[^'&<]+/y;
202
+ var attValueCharDoubleQuote = /["&<]/;
203
+ var attValueCharSingleQuote = /['&<]/;
236
204
  var attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
237
205
  var endCharData = /<|&|]]>/;
238
206
  var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.create(null), {
@@ -243,28 +211,25 @@ var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.crea
243
211
  quot: '"'
244
212
  }));
245
213
  function isNameChar(char) {
246
- let cp = getCodePoint(char);
247
- return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp >= 8255 && cp <= 8256 || isNameStartChar(char, cp);
214
+ let cp = char.codePointAt(0);
215
+ return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp === 8255 || cp === 8256 || isNameStartChar(char, cp);
248
216
  }
249
- function isNameStartChar(char, cp = getCodePoint(char)) {
250
- return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp >= 8204 && cp <= 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
217
+ function isNameStartChar(char, cp = char.codePointAt(0)) {
218
+ return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp === 8204 || cp === 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
251
219
  }
252
220
  function isReferenceChar(char) {
253
221
  return char === "#" || isNameChar(char);
254
222
  }
255
223
  function isWhitespace(char) {
256
- let cp = getCodePoint(char);
224
+ let cp = char.codePointAt(0);
257
225
  return cp === 32 || cp === 9 || cp === 10 || cp === 13;
258
226
  }
259
227
  function isXmlCodePoint(cp) {
260
- return cp === 9 || cp === 10 || cp === 13 || cp >= 32 && cp <= 55295 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
261
- }
262
- function getCodePoint(char) {
263
- return char.codePointAt(0) || -1;
228
+ return cp >= 32 && cp <= 55295 || cp === 10 || cp === 9 || cp === 13 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
264
229
  }
265
230
 
266
231
  // src/lib/XmlNode.ts
267
- var _XmlNode = class {
232
+ var _XmlNode = class _XmlNode {
268
233
  constructor() {
269
234
  /**
270
235
  * Parent node of this node, or `null` if this node has no parent.
@@ -345,39 +310,39 @@ var _XmlNode = class {
345
310
  return json;
346
311
  }
347
312
  };
348
- var XmlNode = _XmlNode;
349
313
  /**
350
314
  * Type value for an `XmlCdata` node.
351
315
  */
352
- XmlNode.TYPE_CDATA = "cdata";
316
+ _XmlNode.TYPE_CDATA = "cdata";
353
317
  /**
354
318
  * Type value for an `XmlComment` node.
355
319
  */
356
- XmlNode.TYPE_COMMENT = "comment";
320
+ _XmlNode.TYPE_COMMENT = "comment";
357
321
  /**
358
322
  * Type value for an `XmlDocument` node.
359
323
  */
360
- XmlNode.TYPE_DOCUMENT = "document";
324
+ _XmlNode.TYPE_DOCUMENT = "document";
361
325
  /**
362
326
  * Type value for an `XmlDocumentType` node.
363
327
  */
364
- XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
328
+ _XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
365
329
  /**
366
330
  * Type value for an `XmlElement` node.
367
331
  */
368
- XmlNode.TYPE_ELEMENT = "element";
332
+ _XmlNode.TYPE_ELEMENT = "element";
369
333
  /**
370
334
  * Type value for an `XmlProcessingInstruction` node.
371
335
  */
372
- XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
336
+ _XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
373
337
  /**
374
338
  * Type value for an `XmlText` node.
375
339
  */
376
- XmlNode.TYPE_TEXT = "text";
340
+ _XmlNode.TYPE_TEXT = "text";
377
341
  /**
378
342
  * Type value for an `XmlDeclaration` node.
379
343
  */
380
- XmlNode.TYPE_XML_DECLARATION = "xmldecl";
344
+ _XmlNode.TYPE_XML_DECLARATION = "xmldecl";
345
+ var XmlNode = _XmlNode;
381
346
 
382
347
  // src/lib/XmlText.ts
383
348
  var XmlText = class extends XmlNode {
@@ -442,7 +407,7 @@ var XmlDeclaration = class extends XmlNode {
442
407
  };
443
408
 
444
409
  // src/lib/XmlElement.ts
445
- var XmlElement = class extends XmlNode {
410
+ var XmlElement = class _XmlElement extends XmlNode {
446
411
  constructor(name, attributes = /* @__PURE__ */ Object.create(null), children = []) {
447
412
  super();
448
413
  this.name = name;
@@ -457,7 +422,7 @@ var XmlElement = class extends XmlNode {
457
422
  }
458
423
  get preserveWhitespace() {
459
424
  let node = this;
460
- while (node instanceof XmlElement) {
425
+ while (node instanceof _XmlElement) {
461
426
  if ("xml:space" in node.attributes) {
462
427
  return node.attributes["xml:space"] === "preserve";
463
428
  }
@@ -611,70 +576,68 @@ var Parser = class {
611
576
  */
612
577
  constructor(xml, options = {}) {
613
578
  let doc = this.document = new XmlDocument();
614
- let scanner = this.c = new StringScanner(xml);
615
- this.l = doc;
616
- this.f = options;
617
- if (this.f.includeOffsets) {
579
+ this.j = doc;
580
+ this.g = options;
581
+ this.c = new StringScanner(xml);
582
+ if (this.g.includeOffsets) {
618
583
  doc.start = 0;
619
584
  doc.end = xml.length;
620
585
  }
621
- scanner.b("\uFEFF");
622
- this.H();
623
- if (!this.B()) {
624
- throw this.a("Root element is missing or invalid");
625
- }
626
- while (this.w()) {
627
- }
628
- if (!scanner.z) {
629
- throw this.a("Extra content at the end of the document");
630
- }
586
+ this.parse();
631
587
  }
632
588
  /**
633
589
  * Adds the given `XmlNode` as a child of `this.currentNode`.
634
590
  */
635
- j(node, charIndex) {
636
- node.parent = this.l;
637
- if (this.f.includeOffsets) {
638
- node.start = this.c.i(charIndex);
639
- node.end = this.c.i();
591
+ i(node, charIndex) {
592
+ node.parent = this.j;
593
+ if (this.g.includeOffsets) {
594
+ node.start = this.c.f(charIndex);
595
+ node.end = this.c.f();
640
596
  }
641
- this.l.children.push(node);
597
+ this.j.children.push(node);
642
598
  return true;
643
599
  }
644
600
  /**
645
601
  * Adds the given _text_ to the document, either by appending it to a
646
602
  * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
603
+ *
604
+ * When _normalize_ is `true` (the default), line breaks in _text_ are
605
+ * normalized per section 2.11 of the XML spec. This must be `false` for text
606
+ * that comes from a character or entity reference, since references aren't
607
+ * subject to line break normalization.
647
608
  */
648
- x(text, charIndex) {
649
- let { children } = this.l;
609
+ y(text, charIndex, normalize = true) {
610
+ let { children } = this.j;
650
611
  let { length } = children;
651
- text = normalizeLineBreaks(text);
612
+ if (normalize) {
613
+ text = normalizeLineBreaks(text);
614
+ }
652
615
  if (length > 0) {
653
616
  let prevNode = children[length - 1];
654
617
  if ((prevNode == null ? void 0 : prevNode.type) === XmlNode.TYPE_TEXT) {
655
618
  let textNode = prevNode;
656
619
  textNode.text += text;
657
- if (this.f.includeOffsets) {
658
- textNode.end = this.c.i();
620
+ if (this.g.includeOffsets) {
621
+ textNode.end = this.c.f();
659
622
  }
660
623
  return true;
661
624
  }
662
625
  }
663
- return this.j(new XmlText(text), charIndex);
626
+ return this.i(new XmlText(text), charIndex);
664
627
  }
665
628
  /**
666
629
  * Consumes element attributes.
667
630
  *
668
631
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
669
632
  */
670
- I() {
633
+ H() {
671
634
  let attributes = /* @__PURE__ */ Object.create(null);
672
635
  while (this.e()) {
673
- let attrName = this.r();
636
+ let attrName = this.q();
674
637
  if (!attrName) {
675
638
  break;
676
639
  }
677
- let attrValue = this.u() && this.J();
640
+ let attrValue = this.t() && this.I();
678
641
  if (attrValue === false) {
679
642
  throw this.a("Attribute value expected");
680
643
  }
@@ -686,7 +649,7 @@ var Parser = class {
686
649
  }
687
650
  attributes[attrName] = attrValue;
688
651
  }
689
- if (this.f.sortAttributes) {
652
+ if (this.g.sortAttributes) {
690
653
  let attrNames = Object.keys(attributes).sort();
691
654
  let sortedAttributes = /* @__PURE__ */ Object.create(null);
692
655
  for (let i = 0; i < attrNames.length; ++i) {
@@ -707,41 +670,40 @@ var Parser = class {
707
670
  *
708
671
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
709
672
  */
710
- J() {
673
+ I() {
711
674
  let { c: scanner } = this;
712
- let quote = scanner.h();
675
+ let quote = scanner.m();
713
676
  if (quote !== '"' && quote !== "'") {
714
677
  return false;
715
678
  }
716
- scanner.g();
679
+ scanner.p();
717
680
  let chars;
718
681
  let isClosed = false;
719
682
  let value = emptyString2;
720
683
  let regex = quote === '"' ? attValueCharDoubleQuote : attValueCharSingleQuote;
721
- matchLoop:
722
- while (!scanner.z) {
723
- chars = scanner.G(regex);
724
- if (chars) {
725
- this.p(chars);
726
- value += chars.replace(attValueNormalizedWhitespace, " ");
727
- }
728
- switch (scanner.h()) {
729
- case quote:
730
- isClosed = true;
731
- break matchLoop;
732
- case "&":
733
- value += this.C();
734
- continue;
735
- case "<":
736
- throw this.a("Unescaped `<` is not allowed in an attribute value");
737
- case emptyString2:
738
- break matchLoop;
739
- }
684
+ matchLoop: while (!scanner.B) {
685
+ chars = scanner.x(regex);
686
+ if (chars) {
687
+ this.o(chars);
688
+ value += chars.replace(attValueNormalizedWhitespace, " ");
740
689
  }
690
+ switch (scanner.m()) {
691
+ case quote:
692
+ isClosed = true;
693
+ break matchLoop;
694
+ case "&":
695
+ value += this.C();
696
+ continue;
697
+ case "<":
698
+ throw this.a("Unescaped `<` is not allowed in an attribute value");
699
+ default:
700
+ break matchLoop;
701
+ }
702
+ }
741
703
  if (!isClosed) {
742
704
  throw this.a("Unclosed attribute");
743
705
  }
744
- scanner.g();
706
+ scanner.p();
745
707
  return value;
746
708
  }
747
709
  /**
@@ -750,18 +712,18 @@ var Parser = class {
750
712
  * @returns Whether a CDATA section was consumed.
751
713
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
752
714
  */
753
- K() {
715
+ J() {
754
716
  let { c: scanner } = this;
755
717
  let startIndex = scanner.d;
756
718
  if (!scanner.b("<![CDATA[")) {
757
719
  return false;
758
720
  }
759
- let text = scanner.t("]]>");
760
- this.p(text);
721
+ let text = scanner.s("]]>");
722
+ this.o(text);
761
723
  if (!scanner.b("]]>")) {
762
724
  throw this.a("Unclosed CDATA section");
763
725
  }
764
- return this.f.preserveCdata ? this.j(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.x(text, startIndex);
726
+ return this.g.preserveCdata ? this.i(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.y(text, startIndex);
765
727
  }
766
728
  /**
767
729
  * Consumes character data if possible.
@@ -769,18 +731,18 @@ var Parser = class {
769
731
  * @returns Whether character data was consumed.
770
732
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
771
733
  */
772
- L() {
734
+ K() {
773
735
  let { c: scanner } = this;
774
736
  let startIndex = scanner.d;
775
- let charData = scanner.A(endCharData);
737
+ let charData = scanner.x(endCharData);
776
738
  if (!charData) {
777
739
  return false;
778
740
  }
779
- this.p(charData);
780
- if (scanner.h(3) === "]]>") {
741
+ this.o(charData);
742
+ if (scanner.m(3) === "]]>") {
781
743
  throw this.a("Element content may not contain the CDATA section close delimiter `]]>`");
782
744
  }
783
- return this.x(charData, startIndex);
745
+ return this.y(charData, startIndex);
784
746
  }
785
747
  /**
786
748
  * Consumes a comment if possible.
@@ -794,15 +756,15 @@ var Parser = class {
794
756
  if (!scanner.b("<!--")) {
795
757
  return false;
796
758
  }
797
- let content = scanner.t("--");
798
- this.p(content);
759
+ let content = scanner.s("--");
760
+ this.o(content);
799
761
  if (!scanner.b("-->")) {
800
- if (scanner.h(2) === "--") {
762
+ if (scanner.m(2) === "--") {
801
763
  throw this.a("The string `--` isn't allowed inside a comment");
802
764
  }
803
765
  throw this.a("Unclosed comment");
804
766
  }
805
- return this.f.preserveComments ? this.j(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
767
+ return this.g.preserveComments ? this.i(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
806
768
  }
807
769
  /**
808
770
  * Consumes a reference in a content context if possible.
@@ -813,10 +775,10 @@ var Parser = class {
813
775
  * @returns Whether a reference was consumed.
814
776
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
815
777
  */
816
- M() {
778
+ L() {
817
779
  let startIndex = this.c.d;
818
780
  let ref = this.C();
819
- return ref ? this.x(ref, startIndex) : false;
781
+ return ref ? this.y(ref, startIndex, false) : false;
820
782
  }
821
783
  /**
822
784
  * Consumes a doctype declaration if possible.
@@ -827,13 +789,13 @@ var Parser = class {
827
789
  * @returns Whether a doctype declaration was consumed.
828
790
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
829
791
  */
830
- N() {
792
+ M() {
831
793
  let { c: scanner } = this;
832
794
  let startIndex = scanner.d;
833
795
  if (!scanner.b("<!DOCTYPE")) {
834
796
  return false;
835
797
  }
836
- let name = this.e() && this.r();
798
+ let name = this.e() && this.q();
837
799
  if (!name) {
838
800
  throw this.a("Expected a name");
839
801
  }
@@ -841,7 +803,7 @@ var Parser = class {
841
803
  let systemId;
842
804
  if (this.e()) {
843
805
  if (scanner.b("PUBLIC")) {
844
- publicId = this.e() && this.O();
806
+ publicId = this.e() && this.N();
845
807
  if (publicId === false) {
846
808
  throw this.a("Expected a public identifier");
847
809
  }
@@ -849,7 +811,7 @@ var Parser = class {
849
811
  }
850
812
  if (publicId !== void 0 || scanner.b("SYSTEM")) {
851
813
  this.e();
852
- systemId = this.s();
814
+ systemId = this.r();
853
815
  if (systemId === false) {
854
816
  throw this.a("Expected a system identifier");
855
817
  }
@@ -858,7 +820,7 @@ var Parser = class {
858
820
  }
859
821
  let internalSubset;
860
822
  if (scanner.b("[")) {
861
- internalSubset = scanner.A(/\][\x20\t\r\n]*>/);
823
+ internalSubset = scanner.x(/\][\x20\t\r\n]*>/);
862
824
  if (!scanner.b("]")) {
863
825
  throw this.a("Unclosed internal subset");
864
826
  }
@@ -867,7 +829,7 @@ var Parser = class {
867
829
  if (!scanner.b(">")) {
868
830
  throw this.a("Unclosed doctype declaration");
869
831
  }
870
- return this.f.preserveDocumentType ? this.j(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
832
+ return this.g.preserveDocumentType ? this.i(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
871
833
  }
872
834
  /**
873
835
  * Consumes an element if possible.
@@ -875,42 +837,42 @@ var Parser = class {
875
837
  * @returns Whether an element was consumed.
876
838
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
877
839
  */
878
- B() {
840
+ E() {
879
841
  let { c: scanner } = this;
880
842
  let startIndex = scanner.d;
881
843
  if (!scanner.b("<")) {
882
844
  return false;
883
845
  }
884
- let name = this.r();
846
+ let name = this.q();
885
847
  if (!name) {
886
- scanner.o(startIndex);
848
+ scanner.n(startIndex);
887
849
  return false;
888
850
  }
889
- let attributes = this.I();
851
+ let attributes = this.H();
890
852
  let isEmpty = !!scanner.b("/>");
891
853
  let element = new XmlElement(name, attributes);
892
- element.parent = this.l;
854
+ element.parent = this.j;
893
855
  if (!isEmpty) {
894
856
  if (!scanner.b(">")) {
895
857
  throw this.a(`Unclosed start tag for element \`${name}\``);
896
858
  }
897
- this.l = element;
859
+ this.j = element;
898
860
  do {
899
- this.L();
900
- } while (this.B() || this.M() || this.K() || this.E() || this.D());
861
+ this.K();
862
+ } while (this.E() || this.L() || this.J() || this.F() || this.D());
901
863
  let endTagMark = scanner.d;
902
864
  let endTagName;
903
- if (!scanner.b("</") || !(endTagName = this.r()) || endTagName !== name) {
904
- scanner.o(endTagMark);
865
+ if (!scanner.b("</") || !(endTagName = this.q()) || endTagName !== name) {
866
+ scanner.n(endTagMark);
905
867
  throw this.a(`Missing end tag for element ${name}`);
906
868
  }
907
869
  this.e();
908
870
  if (!scanner.b(">")) {
909
871
  throw this.a(`Unclosed end tag for element ${name}`);
910
872
  }
911
- this.l = element.parent;
873
+ this.j = element.parent;
912
874
  }
913
- return this.j(element, startIndex);
875
+ return this.i(element, startIndex);
914
876
  }
915
877
  /**
916
878
  * Consumes an `Eq` production if possible.
@@ -918,7 +880,7 @@ var Parser = class {
918
880
  * @returns Whether an `Eq` production was consumed.
919
881
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
920
882
  */
921
- u() {
883
+ t() {
922
884
  this.e();
923
885
  if (this.c.b("=")) {
924
886
  this.e();
@@ -932,8 +894,8 @@ var Parser = class {
932
894
  * @returns Whether anything was consumed.
933
895
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
934
896
  */
935
- w() {
936
- return this.D() || this.E() || this.e();
897
+ z() {
898
+ return this.D() || this.F() || this.e();
937
899
  }
938
900
  /**
939
901
  * Consumes one or more `Name` characters if possible.
@@ -941,8 +903,8 @@ var Parser = class {
941
903
  * @returns `Name` characters, or an empty string if none were consumed.
942
904
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
943
905
  */
944
- r() {
945
- return isNameStartChar(this.c.h()) ? this.c.v(isNameChar) : emptyString2;
906
+ q() {
907
+ return isNameStartChar(this.c.m()) ? this.c.w(isNameChar) : emptyString2;
946
908
  }
947
909
  /**
948
910
  * Consumes a processing instruction if possible.
@@ -950,16 +912,16 @@ var Parser = class {
950
912
  * @returns Whether a processing instruction was consumed.
951
913
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
952
914
  */
953
- E() {
915
+ F() {
954
916
  let { c: scanner } = this;
955
917
  let startIndex = scanner.d;
956
918
  if (!scanner.b("<?")) {
957
919
  return false;
958
920
  }
959
- let name = this.r();
921
+ let name = this.q();
960
922
  if (name) {
961
923
  if (name.toLowerCase() === "xml") {
962
- scanner.o(startIndex);
924
+ scanner.n(startIndex);
963
925
  throw this.a("XML declaration isn't allowed here");
964
926
  }
965
927
  } else {
@@ -967,16 +929,16 @@ var Parser = class {
967
929
  }
968
930
  if (!this.e()) {
969
931
  if (scanner.b("?>")) {
970
- return this.j(new XmlProcessingInstruction(name), startIndex);
932
+ return this.i(new XmlProcessingInstruction(name), startIndex);
971
933
  }
972
934
  throw this.a("Whitespace is required after a processing instruction name");
973
935
  }
974
- let content = scanner.t("?>");
975
- this.p(content);
936
+ let content = scanner.s("?>");
937
+ this.o(content);
976
938
  if (!scanner.b("?>")) {
977
939
  throw this.a("Unterminated processing instruction");
978
940
  }
979
- return this.j(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
941
+ return this.i(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
980
942
  }
981
943
  /**
982
944
  * Consumes a prolog if possible.
@@ -984,14 +946,14 @@ var Parser = class {
984
946
  * @returns Whether a prolog was consumed.
985
947
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
986
948
  */
987
- H() {
949
+ O() {
988
950
  let { c: scanner } = this;
989
951
  let startIndex = scanner.d;
990
952
  this.P();
991
- while (this.w()) {
953
+ while (this.z()) {
992
954
  }
993
- if (this.N()) {
994
- while (this.w()) {
955
+ if (this.M()) {
956
+ while (this.z()) {
995
957
  }
996
958
  }
997
959
  return startIndex < scanner.d;
@@ -1006,11 +968,11 @@ var Parser = class {
1006
968
  *
1007
969
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
1008
970
  */
1009
- O() {
971
+ N() {
1010
972
  let startIndex = this.c.d;
1011
- let value = this.s();
973
+ let value = this.r();
1012
974
  if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
1013
- this.c.o(startIndex);
975
+ this.c.n(startIndex);
1014
976
  throw this.a("Invalid character in public identifier");
1015
977
  }
1016
978
  return value;
@@ -1032,8 +994,8 @@ var Parser = class {
1032
994
  if (!scanner.b("&")) {
1033
995
  return false;
1034
996
  }
1035
- let ref = scanner.v(isReferenceChar);
1036
- if (scanner.F() !== ";") {
997
+ let ref = scanner.w(isReferenceChar);
998
+ if (scanner.G() !== ";") {
1037
999
  throw this.a("Unterminated reference (a reference must end with `;`)");
1038
1000
  }
1039
1001
  let parsedValue;
@@ -1052,7 +1014,7 @@ var Parser = class {
1052
1014
  let {
1053
1015
  ignoreUndefinedEntities,
1054
1016
  resolveUndefinedEntity
1055
- } = this.f;
1017
+ } = this.g;
1056
1018
  let wrappedRef = `&${ref};`;
1057
1019
  if (resolveUndefinedEntity) {
1058
1020
  let resolvedValue = resolveUndefinedEntity(wrappedRef);
@@ -1067,7 +1029,7 @@ var Parser = class {
1067
1029
  if (ignoreUndefinedEntities) {
1068
1030
  return wrappedRef;
1069
1031
  }
1070
- scanner.o(-wrappedRef.length);
1032
+ scanner.n(-wrappedRef.length);
1071
1033
  throw this.a(`Named entity isn't defined: ${wrappedRef}`);
1072
1034
  }
1073
1035
  }
@@ -1086,14 +1048,14 @@ var Parser = class {
1086
1048
  *
1087
1049
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
1088
1050
  */
1089
- s() {
1051
+ r() {
1090
1052
  let { c: scanner } = this;
1091
1053
  let quote = scanner.b('"') || scanner.b("'");
1092
1054
  if (!quote) {
1093
1055
  return false;
1094
1056
  }
1095
- let value = scanner.t(quote);
1096
- this.p(value);
1057
+ let value = scanner.s(quote);
1058
+ this.o(value);
1097
1059
  if (!scanner.b(quote)) {
1098
1060
  throw this.a("Missing end quote");
1099
1061
  }
@@ -1106,7 +1068,7 @@ var Parser = class {
1106
1068
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
1107
1069
  */
1108
1070
  e() {
1109
- return !!this.c.v(isWhitespace);
1071
+ return !!this.c.w(isWhitespace);
1110
1072
  }
1111
1073
  /**
1112
1074
  * Consumes an XML declaration if possible.
@@ -1123,7 +1085,7 @@ var Parser = class {
1123
1085
  if (!this.e()) {
1124
1086
  throw this.a("Invalid XML declaration");
1125
1087
  }
1126
- let version = !!scanner.b("version") && this.u() && this.s();
1088
+ let version = !!scanner.b("version") && this.t() && this.r();
1127
1089
  if (version === false) {
1128
1090
  throw this.a("XML version is missing or invalid");
1129
1091
  } else if (!/^1\.[0-9]+$/.test(version)) {
@@ -1132,11 +1094,14 @@ var Parser = class {
1132
1094
  let encoding;
1133
1095
  let standalone;
1134
1096
  if (this.e()) {
1135
- encoding = !!scanner.b("encoding") && this.u() && this.s();
1097
+ encoding = !!scanner.b("encoding") && this.t() && this.r();
1136
1098
  if (encoding) {
1099
+ if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
1100
+ throw this.a("Invalid character in encoding name");
1101
+ }
1137
1102
  this.e();
1138
1103
  }
1139
- standalone = !!scanner.b("standalone") && this.u() && this.s();
1104
+ standalone = !!scanner.b("standalone") && this.t() && this.r();
1140
1105
  if (standalone) {
1141
1106
  if (standalone !== "yes" && standalone !== "no") {
1142
1107
  throw this.a('Only "yes" and "no" are permitted as values of `standalone`');
@@ -1147,7 +1112,7 @@ var Parser = class {
1147
1112
  if (!scanner.b("?>")) {
1148
1113
  throw this.a("Invalid or unclosed XML declaration");
1149
1114
  }
1150
- return this.f.preserveXmlDeclaration ? this.j(new XmlDeclaration(
1115
+ return this.g.preserveXmlDeclaration ? this.i(new XmlDeclaration(
1151
1116
  version,
1152
1117
  encoding || void 0,
1153
1118
  standalone || void 0
@@ -1158,18 +1123,33 @@ var Parser = class {
1158
1123
  */
1159
1124
  a(message) {
1160
1125
  let { c: scanner } = this;
1161
- return new XmlError(message, scanner.d, scanner.m);
1126
+ return new XmlError(message, scanner.d, scanner.h);
1127
+ }
1128
+ /**
1129
+ * Parses the XML input.
1130
+ */
1131
+ parse() {
1132
+ this.c.b("\uFEFF");
1133
+ this.O();
1134
+ if (!this.E()) {
1135
+ throw this.a("Root element is missing or invalid");
1136
+ }
1137
+ while (this.z()) {
1138
+ }
1139
+ if (!this.c.B) {
1140
+ throw this.a("Extra content at the end of the document");
1141
+ }
1162
1142
  }
1163
1143
  /**
1164
1144
  * Throws an invalid character error if any character in the given _string_
1165
1145
  * isn't a valid XML character.
1166
1146
  */
1167
- p(string) {
1147
+ o(string) {
1168
1148
  let { length } = string;
1169
1149
  for (let i = 0; i < length; ++i) {
1170
1150
  let cp = string.codePointAt(i);
1171
1151
  if (!isXmlCodePoint(cp)) {
1172
- this.c.o(-([...string].length - i));
1152
+ this.c.n(-([...string].length - i));
1173
1153
  throw this.a("Invalid character");
1174
1154
  }
1175
1155
  if (cp > 65535) {