@rgrove/parse-xml 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +46 -31
  2. package/dist/browser.js +692 -300
  3. package/dist/browser.js.map +4 -4
  4. package/dist/global.min.js +9 -8
  5. package/dist/global.min.js.map +4 -4
  6. package/dist/index.d.ts +3 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +8 -2
  9. package/dist/index.js.map +1 -1
  10. package/dist/lib/Parser.d.ts +53 -6
  11. package/dist/lib/Parser.d.ts.map +1 -1
  12. package/dist/lib/Parser.js +166 -126
  13. package/dist/lib/Parser.js.map +1 -1
  14. package/dist/lib/StringScanner.d.ts +15 -21
  15. package/dist/lib/StringScanner.d.ts.map +1 -1
  16. package/dist/lib/StringScanner.js +63 -86
  17. package/dist/lib/StringScanner.js.map +1 -1
  18. package/dist/lib/XmlDeclaration.d.ts +30 -0
  19. package/dist/lib/XmlDeclaration.d.ts.map +1 -0
  20. package/dist/lib/XmlDeclaration.js +36 -0
  21. package/dist/lib/XmlDeclaration.js.map +1 -0
  22. package/dist/lib/XmlDocument.d.ts +4 -2
  23. package/dist/lib/XmlDocument.d.ts.map +1 -1
  24. package/dist/lib/XmlDocument.js.map +1 -1
  25. package/dist/lib/XmlDocumentType.d.ts +37 -0
  26. package/dist/lib/XmlDocumentType.d.ts.map +1 -0
  27. package/dist/lib/XmlDocumentType.js +39 -0
  28. package/dist/lib/XmlDocumentType.js.map +1 -0
  29. package/dist/lib/XmlElement.js.map +1 -1
  30. package/dist/lib/XmlError.d.ts +24 -0
  31. package/dist/lib/XmlError.d.ts.map +1 -0
  32. package/dist/lib/XmlError.js +52 -0
  33. package/dist/lib/XmlError.js.map +1 -0
  34. package/dist/lib/XmlNode.d.ts +20 -1
  35. package/dist/lib/XmlNode.d.ts.map +1 -1
  36. package/dist/lib/XmlNode.js +28 -3
  37. package/dist/lib/XmlNode.js.map +1 -1
  38. package/dist/lib/syntax.d.ts.map +1 -1
  39. package/dist/lib/syntax.js +18 -23
  40. package/dist/lib/syntax.js.map +1 -1
  41. package/dist/lib/types.d.ts +2 -2
  42. package/dist/lib/types.d.ts.map +1 -1
  43. package/package.json +20 -23
  44. package/src/index.ts +3 -0
  45. package/src/lib/Parser.ts +228 -141
  46. package/src/lib/StringScanner.ts +66 -103
  47. package/src/lib/XmlDeclaration.ts +58 -0
  48. package/src/lib/XmlDocument.ts +4 -2
  49. package/src/lib/XmlDocumentType.ts +67 -0
  50. package/src/lib/XmlError.ts +80 -0
  51. package/src/lib/XmlNode.ts +33 -3
  52. package/src/lib/syntax.ts +12 -18
package/dist/browser.js CHANGED
@@ -1,4 +1,4 @@
1
- /*! @rgrove/parse-xml v4.0.1 | ISC License | Copyright Ryan Grove */
1
+ /*! @rgrove/parse-xml v4.2.0 | ISC License | Copyright Ryan Grove */
2
2
  "use strict";
3
3
  var __defProp = Object.defineProperty;
4
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
@@ -23,8 +23,11 @@ var src_exports = {};
23
23
  __export(src_exports, {
24
24
  XmlCdata: () => XmlCdata,
25
25
  XmlComment: () => XmlComment,
26
+ XmlDeclaration: () => XmlDeclaration,
26
27
  XmlDocument: () => XmlDocument,
28
+ XmlDocumentType: () => XmlDocumentType,
27
29
  XmlElement: () => XmlElement,
30
+ XmlError: () => XmlError,
28
31
  XmlNode: () => XmlNode,
29
32
  XmlProcessingInstruction: () => XmlProcessingInstruction,
30
33
  XmlText: () => XmlText,
@@ -37,125 +40,168 @@ var emptyString = "";
37
40
  var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
38
41
  var StringScanner = class {
39
42
  constructor(string) {
40
- this.h = this.m(string, true);
43
+ this.k = this.u(string, true);
41
44
  this.d = 0;
42
45
  this.length = string.length;
43
- this.j = this.h !== this.length;
44
- this.string = string;
45
- if (this.j) {
46
+ this.l = this.k !== this.length;
47
+ this.h = string;
48
+ if (this.l) {
46
49
  let charsToBytes = [];
47
- for (let byteIndex = 0, charIndex = 0; charIndex < this.h; ++charIndex) {
50
+ for (let byteIndex = 0, charIndex = 0; charIndex < this.k; ++charIndex) {
48
51
  charsToBytes[charIndex] = byteIndex;
49
52
  byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
50
53
  }
51
- this.x = charsToBytes;
54
+ this.A = charsToBytes;
52
55
  }
53
56
  }
54
- get y() {
55
- return this.d >= this.h;
56
- }
57
- n(charIndex = this.d) {
58
- var _a;
59
- return this.j ? (_a = this.x[charIndex]) != null ? _a : Infinity : charIndex;
60
- }
61
- m(string, multiByteSafe = this.j) {
57
+ /**
58
+ * Whether the current character index is at the end of the input string.
59
+ */
60
+ get B() {
61
+ return this.d >= this.k;
62
+ }
63
+ // -- Protected Methods ------------------------------------------------------
64
+ /**
65
+ * Returns the number of characters in the given string, which may differ from
66
+ * the byte length if the string contains multibyte characters.
67
+ */
68
+ u(string, multiByteSafe = this.l) {
62
69
  return multiByteSafe ? string.replace(surrogatePair, "_").length : string.length;
63
70
  }
64
- f(count = 1) {
65
- this.d = Math.min(this.h, this.d + count);
66
- }
67
- D(count = 1) {
68
- let chars = this.g(count);
69
- this.f(count);
71
+ // -- Public Methods ---------------------------------------------------------
72
+ /**
73
+ * Advances the scanner by the given number of characters, stopping if the end
74
+ * of the string is reached.
75
+ */
76
+ p(count = 1) {
77
+ this.d = Math.min(this.k, this.d + count);
78
+ }
79
+ /**
80
+ * Returns the byte index of the given character index in the string. The two
81
+ * may differ in strings that contain multibyte characters.
82
+ */
83
+ f(charIndex = this.d) {
84
+ var _a;
85
+ return this.l ? (_a = this.A[charIndex]) != null ? _a : Infinity : charIndex;
86
+ }
87
+ /**
88
+ * Consumes and returns the given number of characters if possible, advancing
89
+ * the scanner and stopping if the end of the string is reached.
90
+ *
91
+ * If no characters could be consumed, an empty string will be returned.
92
+ */
93
+ G(charCount = 1) {
94
+ let chars = this.m(charCount);
95
+ this.p(charCount);
70
96
  return chars;
71
97
  }
72
- s(regex) {
73
- if (!regex.sticky) {
74
- throw new Error('`regex` must have a sticky flag ("y")');
75
- }
76
- regex.lastIndex = this.n();
77
- let result = regex.exec(this.string);
78
- if (result === null || result.length === 0) {
79
- return emptyString;
80
- }
81
- let match = result[0];
82
- this.f(this.m(match));
83
- return match;
84
- }
85
- t(fn) {
86
- let char;
87
- let match = emptyString;
88
- while ((char = this.g()) && fn(char)) {
89
- match += char;
90
- this.f();
91
- }
92
- return match;
98
+ /**
99
+ * Consumes and returns the given number of bytes if possible, advancing the
100
+ * scanner and stopping if the end of the string is reached.
101
+ *
102
+ * It's up to the caller to ensure that the given byte count doesn't split a
103
+ * multibyte character.
104
+ *
105
+ * If no bytes could be consumed, an empty string will be returned.
106
+ */
107
+ v(byteCount) {
108
+ let byteIndex = this.f();
109
+ let result = this.h.slice(byteIndex, byteIndex + byteCount);
110
+ this.p(this.u(result));
111
+ return result;
93
112
  }
94
- N(stringToConsume) {
95
- if (this.b(stringToConsume)) {
96
- return stringToConsume;
97
- }
98
- if (this.j) {
99
- let { length } = stringToConsume;
100
- let charLengthToMatch = this.m(stringToConsume);
101
- if (charLengthToMatch !== length && stringToConsume === this.g(charLengthToMatch)) {
102
- this.f(charLengthToMatch);
103
- return stringToConsume;
113
+ /**
114
+ * Consumes and returns all characters for which the given function returns
115
+ * `true`, stopping when `false` is returned or the end of the input is
116
+ * reached.
117
+ */
118
+ w(fn) {
119
+ let { length, l: multiByteMode, h: string } = this;
120
+ let startByteIndex = this.f();
121
+ let endByteIndex = startByteIndex;
122
+ if (multiByteMode) {
123
+ while (endByteIndex < length) {
124
+ let char = string[endByteIndex];
125
+ let isSurrogatePair = char >= "\uD800" && char <= "\uDBFF";
126
+ if (isSurrogatePair) {
127
+ char += string[endByteIndex + 1];
128
+ }
129
+ if (!fn(char)) {
130
+ break;
131
+ }
132
+ endByteIndex += isSurrogatePair ? 2 : 1;
133
+ }
134
+ } else {
135
+ while (endByteIndex < length && fn(string[endByteIndex])) {
136
+ ++endByteIndex;
104
137
  }
105
138
  }
106
- return emptyString;
107
- }
139
+ return this.v(endByteIndex - startByteIndex);
140
+ }
141
+ /**
142
+ * Consumes the given string if it exists at the current character index, and
143
+ * advances the scanner.
144
+ *
145
+ * If the given string doesn't exist at the current character index, an empty
146
+ * string will be returned and the scanner will not be advanced.
147
+ */
108
148
  b(stringToConsume) {
109
149
  let { length } = stringToConsume;
110
- if (this.g(length) === stringToConsume) {
111
- this.f(length);
150
+ let byteIndex = this.f();
151
+ if (stringToConsume === this.h.slice(byteIndex, byteIndex + length)) {
152
+ this.p(length === 1 ? 1 : this.u(stringToConsume));
112
153
  return stringToConsume;
113
154
  }
114
155
  return emptyString;
115
156
  }
116
- E(regex) {
117
- let restOfString = this.string.slice(this.n());
118
- let matchByteIndex = restOfString.search(regex);
119
- if (matchByteIndex <= 0) {
120
- return emptyString;
121
- }
122
- let result = restOfString.slice(0, matchByteIndex);
123
- this.f(this.m(result));
124
- return result;
125
- }
126
- p(searchString) {
127
- let { string } = this;
128
- let byteIndex = this.n();
129
- let matchByteIndex = string.indexOf(searchString, byteIndex);
130
- if (matchByteIndex <= 0) {
131
- return emptyString;
132
- }
133
- let result = string.slice(byteIndex, matchByteIndex);
134
- this.f(this.m(result));
135
- return result;
136
- }
137
- g(count = 1) {
138
- let { d: charIndex, j: multiByteMode, string } = this;
139
- if (multiByteMode) {
140
- if (charIndex >= this.h) {
141
- return emptyString;
142
- }
143
- return string.slice(
144
- this.n(charIndex),
145
- this.n(charIndex + count)
146
- );
147
- }
148
- return string.slice(charIndex, charIndex + count);
149
- }
150
- o(index = 0) {
151
- this.d = index >= 0 ? Math.min(this.h, index) : Math.max(0, this.d + index);
157
+ /**
158
+ * Consumes characters until the given global regex is matched, advancing the
159
+ * scanner up to (but not beyond) the beginning of the match. If the regex
160
+ * doesn't match, nothing will be consumed.
161
+ *
162
+ * Returns the consumed string, or an empty string if nothing was consumed.
163
+ */
164
+ x(regex) {
165
+ let matchByteIndex = this.h.slice(this.f()).search(regex);
166
+ return matchByteIndex > 0 ? this.v(matchByteIndex) : emptyString;
167
+ }
168
+ /**
169
+ * Consumes characters until the given string is found, advancing the scanner
170
+ * up to (but not beyond) that point. If the string is never found, nothing
171
+ * will be consumed.
172
+ *
173
+ * Returns the consumed string, or an empty string if nothing was consumed.
174
+ */
175
+ s(searchString) {
176
+ let byteIndex = this.f();
177
+ let matchByteIndex = this.h.indexOf(searchString, byteIndex);
178
+ return matchByteIndex > 0 ? this.v(matchByteIndex - byteIndex) : emptyString;
179
+ }
180
+ /**
181
+ * Returns the given number of characters starting at the current character
182
+ * index, without advancing the scanner and without exceeding the end of the
183
+ * input string.
184
+ */
185
+ m(count = 1) {
186
+ let { d: charIndex, h: string } = this;
187
+ return this.l ? string.slice(this.f(charIndex), this.f(charIndex + count)) : string.slice(charIndex, charIndex + count);
188
+ }
189
+ /**
190
+ * Resets the scanner position to the given character _index_, or to the start
191
+ * of the input string if no index is given.
192
+ *
193
+ * If _index_ is negative, the scanner position will be moved backward by that
194
+ * many characters, stopping if the beginning of the string is reached.
195
+ */
196
+ n(index = 0) {
197
+ this.d = index >= 0 ? Math.min(this.k, index) : Math.max(0, this.d + index);
152
198
  }
153
199
  };
154
200
 
155
201
  // src/lib/syntax.ts
156
- var attValueCharDoubleQuote = /[^"&<]+/y;
157
- var attValueCharSingleQuote = /[^'&<]+/y;
158
- var attValueNormalizedWhitespace = /[\t\n]/g;
202
+ var attValueCharDoubleQuote = /["&<]/;
203
+ var attValueCharSingleQuote = /['&<]/;
204
+ var attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
159
205
  var endCharData = /<|&|]]>/;
160
206
  var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.create(null), {
161
207
  amp: "&",
@@ -165,45 +211,88 @@ var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.crea
165
211
  quot: '"'
166
212
  }));
167
213
  function isNameChar(char) {
168
- let cp = getCodePoint(char);
169
- return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp >= 8255 && cp <= 8256 || isNameStartChar(char, cp);
214
+ let cp = char.codePointAt(0);
215
+ return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp === 8255 || cp === 8256 || isNameStartChar(char, cp);
170
216
  }
171
- function isNameStartChar(char, cp = getCodePoint(char)) {
172
- return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp >= 8204 && cp <= 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
217
+ function isNameStartChar(char, cp = char.codePointAt(0)) {
218
+ return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp === 8204 || cp === 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
173
219
  }
174
220
  function isReferenceChar(char) {
175
221
  return char === "#" || isNameChar(char);
176
222
  }
177
223
  function isWhitespace(char) {
178
- let cp = getCodePoint(char);
224
+ let cp = char.codePointAt(0);
179
225
  return cp === 32 || cp === 9 || cp === 10 || cp === 13;
180
226
  }
181
227
  function isXmlCodePoint(cp) {
182
- return cp === 9 || cp === 10 || cp === 13 || cp >= 32 && cp <= 55295 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
183
- }
184
- function getCodePoint(char) {
185
- return char.codePointAt(0) || -1;
228
+ return cp >= 32 && cp <= 55295 || cp === 10 || cp === 9 || cp === 13 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
186
229
  }
187
230
 
188
231
  // src/lib/XmlNode.ts
189
- var XmlNode = class {
232
+ var _XmlNode = class _XmlNode {
190
233
  constructor() {
234
+ /**
235
+ * Parent node of this node, or `null` if this node has no parent.
236
+ */
191
237
  this.parent = null;
192
- }
238
+ /**
239
+ * Starting byte offset of this node in the original XML string, or `-1` if
240
+ * the offset is unknown.
241
+ */
242
+ this.start = -1;
243
+ /**
244
+ * Ending byte offset of this node in the original XML string, or `-1` if the
245
+ * offset is unknown.
246
+ */
247
+ this.end = -1;
248
+ }
249
+ /**
250
+ * Document that contains this node, or `null` if this node is not associated
251
+ * with a document.
252
+ */
193
253
  get document() {
194
254
  var _a, _b;
195
255
  return (_b = (_a = this.parent) == null ? void 0 : _a.document) != null ? _b : null;
196
256
  }
257
+ /**
258
+ * Whether this node is the root node of the document (also known as the
259
+ * document element).
260
+ */
197
261
  get isRootNode() {
198
- return this.parent !== null && this.parent === this.document;
199
- }
262
+ return this.parent !== null && this.parent === this.document && this.type === _XmlNode.TYPE_ELEMENT;
263
+ }
264
+ /**
265
+ * Whether whitespace should be preserved in the content of this element and
266
+ * its children.
267
+ *
268
+ * This is influenced by the value of the special `xml:space` attribute, and
269
+ * will be `true` for any node whose `xml:space` attribute is set to
270
+ * "preserve". If a node has no such attribute, it will inherit the value of
271
+ * the nearest ancestor that does (if any).
272
+ *
273
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
274
+ */
200
275
  get preserveWhitespace() {
201
276
  var _a;
202
- return Boolean((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
203
- }
277
+ return !!((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
278
+ }
279
+ /**
280
+ * Type of this node.
281
+ *
282
+ * The value of this property is a string that matches one of the static
283
+ * `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
284
+ * `TYPE_TEXT`, etc.).
285
+ *
286
+ * The `XmlNode` class itself is a base class and doesn't have its own type
287
+ * name.
288
+ */
204
289
  get type() {
205
290
  return "";
206
291
  }
292
+ /**
293
+ * Returns a JSON-serializable object representing this node, minus properties
294
+ * that could result in circular references.
295
+ */
207
296
  toJSON() {
208
297
  let json = {
209
298
  type: this.type
@@ -214,15 +303,46 @@ var XmlNode = class {
214
303
  if (this.preserveWhitespace) {
215
304
  json.preserveWhitespace = true;
216
305
  }
306
+ if (this.start !== -1) {
307
+ json.start = this.start;
308
+ json.end = this.end;
309
+ }
217
310
  return json;
218
311
  }
219
312
  };
220
- XmlNode.TYPE_CDATA = "cdata";
221
- XmlNode.TYPE_COMMENT = "comment";
222
- XmlNode.TYPE_DOCUMENT = "document";
223
- XmlNode.TYPE_ELEMENT = "element";
224
- XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
225
- XmlNode.TYPE_TEXT = "text";
313
+ /**
314
+ * Type value for an `XmlCdata` node.
315
+ */
316
+ _XmlNode.TYPE_CDATA = "cdata";
317
+ /**
318
+ * Type value for an `XmlComment` node.
319
+ */
320
+ _XmlNode.TYPE_COMMENT = "comment";
321
+ /**
322
+ * Type value for an `XmlDocument` node.
323
+ */
324
+ _XmlNode.TYPE_DOCUMENT = "document";
325
+ /**
326
+ * Type value for an `XmlDocumentType` node.
327
+ */
328
+ _XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
329
+ /**
330
+ * Type value for an `XmlElement` node.
331
+ */
332
+ _XmlNode.TYPE_ELEMENT = "element";
333
+ /**
334
+ * Type value for an `XmlProcessingInstruction` node.
335
+ */
336
+ _XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
337
+ /**
338
+ * Type value for an `XmlText` node.
339
+ */
340
+ _XmlNode.TYPE_TEXT = "text";
341
+ /**
342
+ * Type value for an `XmlDeclaration` node.
343
+ */
344
+ _XmlNode.TYPE_XML_DECLARATION = "xmldecl";
345
+ var XmlNode = _XmlNode;
226
346
 
227
347
  // src/lib/XmlText.ts
228
348
  var XmlText = class extends XmlNode {
@@ -263,20 +383,46 @@ var XmlComment = class extends XmlNode {
263
383
  }
264
384
  };
265
385
 
386
+ // src/lib/XmlDeclaration.ts
387
+ var XmlDeclaration = class extends XmlNode {
388
+ constructor(version, encoding, standalone) {
389
+ super();
390
+ this.version = version;
391
+ this.encoding = encoding != null ? encoding : null;
392
+ this.standalone = standalone != null ? standalone : null;
393
+ }
394
+ get type() {
395
+ return XmlNode.TYPE_XML_DECLARATION;
396
+ }
397
+ toJSON() {
398
+ let json = XmlNode.prototype.toJSON.call(this);
399
+ json.version = this.version;
400
+ for (let key of ["encoding", "standalone"]) {
401
+ if (this[key] !== null) {
402
+ json[key] = this[key];
403
+ }
404
+ }
405
+ return json;
406
+ }
407
+ };
408
+
266
409
  // src/lib/XmlElement.ts
267
- var XmlElement = class extends XmlNode {
410
+ var XmlElement = class _XmlElement extends XmlNode {
268
411
  constructor(name, attributes = /* @__PURE__ */ Object.create(null), children = []) {
269
412
  super();
270
413
  this.name = name;
271
414
  this.attributes = attributes;
272
415
  this.children = children;
273
416
  }
417
+ /**
418
+ * Whether this element is empty (meaning it has no children).
419
+ */
274
420
  get isEmpty() {
275
421
  return this.children.length === 0;
276
422
  }
277
423
  get preserveWhitespace() {
278
424
  let node = this;
279
- while (node instanceof XmlElement) {
425
+ while (node instanceof _XmlElement) {
280
426
  if ("xml:space" in node.attributes) {
281
427
  return node.attributes["xml:space"] === "preserve";
282
428
  }
@@ -284,6 +430,9 @@ var XmlElement = class extends XmlNode {
284
430
  }
285
431
  return false;
286
432
  }
433
+ /**
434
+ * Text content of this element and all its descendants.
435
+ */
287
436
  get text() {
288
437
  return this.children.map((child) => "text" in child ? child.text : "").join("");
289
438
  }
@@ -308,6 +457,9 @@ var XmlDocument = class extends XmlNode {
308
457
  get document() {
309
458
  return this;
310
459
  }
460
+ /**
461
+ * Root element of this document, or `null` if this document is empty.
462
+ */
311
463
  get root() {
312
464
  for (let child of this.children) {
313
465
  if (child instanceof XmlElement) {
@@ -316,6 +468,9 @@ var XmlDocument = class extends XmlNode {
316
468
  }
317
469
  return null;
318
470
  }
471
+ /**
472
+ * Text content of this document and all its descendants.
473
+ */
319
474
  get text() {
320
475
  return this.children.map((child) => "text" in child ? child.text : "").join("");
321
476
  }
@@ -329,6 +484,71 @@ var XmlDocument = class extends XmlNode {
329
484
  }
330
485
  };
331
486
 
487
+ // src/lib/XmlDocumentType.ts
488
+ var XmlDocumentType = class extends XmlNode {
489
+ constructor(name, publicId, systemId, internalSubset) {
490
+ super();
491
+ this.name = name;
492
+ this.publicId = publicId != null ? publicId : null;
493
+ this.systemId = systemId != null ? systemId : null;
494
+ this.internalSubset = internalSubset != null ? internalSubset : null;
495
+ }
496
+ get type() {
497
+ return XmlNode.TYPE_DOCUMENT_TYPE;
498
+ }
499
+ toJSON() {
500
+ let json = XmlNode.prototype.toJSON.call(this);
501
+ json.name = this.name;
502
+ for (let key of ["publicId", "systemId", "internalSubset"]) {
503
+ if (this[key] !== null) {
504
+ json[key] = this[key];
505
+ }
506
+ }
507
+ return json;
508
+ }
509
+ };
510
+
511
+ // src/lib/XmlError.ts
512
+ var XmlError = class extends Error {
513
+ constructor(message, charIndex, xml) {
514
+ let column = 1;
515
+ let excerpt = "";
516
+ let line = 1;
517
+ for (let i = 0; i < charIndex; ++i) {
518
+ let char = xml[i];
519
+ if (char === "\n") {
520
+ column = 1;
521
+ excerpt = "";
522
+ line += 1;
523
+ } else {
524
+ column += 1;
525
+ excerpt += char;
526
+ }
527
+ }
528
+ let eol = xml.indexOf("\n", charIndex);
529
+ excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
530
+ let excerptStart = 0;
531
+ if (excerpt.length > 50) {
532
+ if (column < 40) {
533
+ excerpt = excerpt.slice(0, 50);
534
+ } else {
535
+ excerptStart = column - 20;
536
+ excerpt = excerpt.slice(excerptStart, column + 30);
537
+ }
538
+ }
539
+ super(
540
+ `${message} (line ${line}, column ${column})
541
+ ${excerpt}
542
+ ` + " ".repeat(column - excerptStart + 1) + "^\n"
543
+ );
544
+ this.column = column;
545
+ this.excerpt = excerpt;
546
+ this.line = line;
547
+ this.name = "XmlError";
548
+ this.pos = charIndex;
549
+ }
550
+ };
551
+
332
552
  // src/lib/XmlProcessingInstruction.ts
333
553
  var XmlProcessingInstruction = class extends XmlNode {
334
554
  constructor(name, content = "") {
@@ -350,45 +570,67 @@ var XmlProcessingInstruction = class extends XmlNode {
350
570
  // src/lib/Parser.ts
351
571
  var emptyString2 = "";
352
572
  var Parser = class {
573
+ /**
574
+ * @param xml XML string to parse.
575
+ * @param options Parser options.
576
+ */
353
577
  constructor(xml, options = {}) {
354
- this.document = new XmlDocument();
355
- this.i = this.document;
356
- this.options = options;
357
- this.c = new StringScanner(normalizeXmlString(xml));
358
- this.F();
359
- if (!this.z()) {
360
- throw this.a("Root element is missing or invalid");
578
+ let doc = this.document = new XmlDocument();
579
+ this.j = doc;
580
+ this.g = options;
581
+ this.c = new StringScanner(xml);
582
+ if (this.g.includeOffsets) {
583
+ doc.start = 0;
584
+ doc.end = xml.length;
361
585
  }
362
- while (this.u()) {
586
+ this.parse();
587
+ }
588
+ /**
589
+ * Adds the given `XmlNode` as a child of `this.currentNode`.
590
+ */
591
+ i(node, charIndex) {
592
+ node.parent = this.j;
593
+ if (this.g.includeOffsets) {
594
+ node.start = this.c.f(charIndex);
595
+ node.end = this.c.f();
363
596
  }
364
- if (!this.c.y) {
365
- throw this.a("Extra content at the end of the document");
366
- }
367
- }
368
- k(node) {
369
- node.parent = this.i;
370
- this.i.children.push(node);
597
+ this.j.children.push(node);
598
+ return true;
371
599
  }
372
- v(text) {
373
- let { children } = this.i;
600
+ /**
601
+ * Adds the given _text_ to the document, either by appending it to a
602
+ * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
603
+ */
604
+ y(text, charIndex) {
605
+ let { children } = this.j;
374
606
  let { length } = children;
607
+ text = normalizeLineBreaks(text);
375
608
  if (length > 0) {
376
609
  let prevNode = children[length - 1];
377
- if (prevNode instanceof XmlText) {
378
- prevNode.text += text;
379
- return;
610
+ if ((prevNode == null ? void 0 : prevNode.type) === XmlNode.TYPE_TEXT) {
611
+ let textNode = prevNode;
612
+ textNode.text += text;
613
+ if (this.g.includeOffsets) {
614
+ textNode.end = this.c.f();
615
+ }
616
+ return true;
380
617
  }
381
618
  }
382
- this.k(new XmlText(text));
619
+ return this.i(new XmlText(text), charIndex);
383
620
  }
384
- G() {
621
+ /**
622
+ * Consumes element attributes.
623
+ *
624
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
625
+ */
626
+ H() {
385
627
  let attributes = /* @__PURE__ */ Object.create(null);
386
628
  while (this.e()) {
387
629
  let attrName = this.q();
388
630
  if (!attrName) {
389
631
  break;
390
632
  }
391
- let attrValue = this.r() && this.H();
633
+ let attrValue = this.t() && this.I();
392
634
  if (attrValue === false) {
393
635
  throw this.a("Attribute value expected");
394
636
  }
@@ -400,7 +642,7 @@ var Parser = class {
400
642
  }
401
643
  attributes[attrName] = attrValue;
402
644
  }
403
- if (this.options.sortAttributes) {
645
+ if (this.g.sortAttributes) {
404
646
  let attrNames = Object.keys(attributes).sort();
405
647
  let sortedAttributes = /* @__PURE__ */ Object.create(null);
406
648
  for (let i = 0; i < attrNames.length; ++i) {
@@ -411,152 +653,227 @@ var Parser = class {
411
653
  }
412
654
  return attributes;
413
655
  }
414
- H() {
656
+ /**
657
+ * Consumes an `AttValue` (attribute value) if possible.
658
+ *
659
+ * @returns
660
+ * Contents of the `AttValue` minus quotes, or `false` if nothing was
661
+ * consumed. An empty string indicates that an `AttValue` was consumed but
662
+ * was empty.
663
+ *
664
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
665
+ */
666
+ I() {
415
667
  let { c: scanner } = this;
416
- let quote = scanner.g();
668
+ let quote = scanner.m();
417
669
  if (quote !== '"' && quote !== "'") {
418
670
  return false;
419
671
  }
420
- scanner.f();
672
+ scanner.p();
421
673
  let chars;
422
674
  let isClosed = false;
423
675
  let value = emptyString2;
424
676
  let regex = quote === '"' ? attValueCharDoubleQuote : attValueCharSingleQuote;
425
- matchLoop:
426
- while (!scanner.y) {
427
- chars = scanner.s(regex);
428
- if (chars) {
429
- this.l(chars);
430
- value += chars.replace(attValueNormalizedWhitespace, " ");
431
- }
432
- switch (scanner.g()) {
433
- case quote:
434
- isClosed = true;
435
- break matchLoop;
436
- case "&":
437
- value += this.A();
438
- continue;
439
- case "<":
440
- throw this.a("Unescaped `<` is not allowed in an attribute value");
441
- case emptyString2:
442
- break matchLoop;
443
- }
677
+ matchLoop: while (!scanner.B) {
678
+ chars = scanner.x(regex);
679
+ if (chars) {
680
+ this.o(chars);
681
+ value += chars.replace(attValueNormalizedWhitespace, " ");
682
+ }
683
+ switch (scanner.m()) {
684
+ case quote:
685
+ isClosed = true;
686
+ break matchLoop;
687
+ case "&":
688
+ value += this.C();
689
+ continue;
690
+ case "<":
691
+ throw this.a("Unescaped `<` is not allowed in an attribute value");
692
+ default:
693
+ break matchLoop;
444
694
  }
695
+ }
445
696
  if (!isClosed) {
446
697
  throw this.a("Unclosed attribute");
447
698
  }
448
- scanner.f();
699
+ scanner.p();
449
700
  return value;
450
701
  }
451
- I() {
702
+ /**
703
+ * Consumes a CDATA section if possible.
704
+ *
705
+ * @returns Whether a CDATA section was consumed.
706
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
707
+ */
708
+ J() {
452
709
  let { c: scanner } = this;
710
+ let startIndex = scanner.d;
453
711
  if (!scanner.b("<![CDATA[")) {
454
712
  return false;
455
713
  }
456
- let text = scanner.p("]]>");
457
- this.l(text);
714
+ let text = scanner.s("]]>");
715
+ this.o(text);
458
716
  if (!scanner.b("]]>")) {
459
717
  throw this.a("Unclosed CDATA section");
460
718
  }
461
- if (this.options.preserveCdata) {
462
- this.k(new XmlCdata(text));
463
- } else {
464
- this.v(text);
465
- }
466
- return true;
467
- }
468
- J() {
719
+ return this.g.preserveCdata ? this.i(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.y(text, startIndex);
720
+ }
721
+ /**
722
+ * Consumes character data if possible.
723
+ *
724
+ * @returns Whether character data was consumed.
725
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
726
+ */
727
+ K() {
469
728
  let { c: scanner } = this;
470
- let charData = scanner.E(endCharData);
729
+ let startIndex = scanner.d;
730
+ let charData = scanner.x(endCharData);
471
731
  if (!charData) {
472
732
  return false;
473
733
  }
474
- this.l(charData);
475
- if (scanner.g(3) === "]]>") {
734
+ this.o(charData);
735
+ if (scanner.m(3) === "]]>") {
476
736
  throw this.a("Element content may not contain the CDATA section close delimiter `]]>`");
477
737
  }
478
- this.v(charData);
479
- return true;
480
- }
481
- B() {
738
+ return this.y(charData, startIndex);
739
+ }
740
+ /**
741
+ * Consumes a comment if possible.
742
+ *
743
+ * @returns Whether a comment was consumed.
744
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
745
+ */
746
+ D() {
482
747
  let { c: scanner } = this;
748
+ let startIndex = scanner.d;
483
749
  if (!scanner.b("<!--")) {
484
750
  return false;
485
751
  }
486
- let content = scanner.p("--");
487
- this.l(content);
752
+ let content = scanner.s("--");
753
+ this.o(content);
488
754
  if (!scanner.b("-->")) {
489
- if (scanner.g(2) === "--") {
755
+ if (scanner.m(2) === "--") {
490
756
  throw this.a("The string `--` isn't allowed inside a comment");
491
757
  }
492
758
  throw this.a("Unclosed comment");
493
759
  }
494
- if (this.options.preserveComments) {
495
- this.k(new XmlComment(content.trim()));
496
- }
497
- return true;
498
- }
499
- K() {
500
- let ref = this.A();
501
- if (ref) {
502
- this.v(ref);
503
- return true;
504
- }
505
- return false;
506
- }
760
+ return this.g.preserveComments ? this.i(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
761
+ }
762
+ /**
763
+ * Consumes a reference in a content context if possible.
764
+ *
765
+ * This differs from `consumeReference()` in that a consumed reference will be
766
+ * added to the document as a text node instead of returned.
767
+ *
768
+ * @returns Whether a reference was consumed.
769
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
770
+ */
507
771
  L() {
772
+ let startIndex = this.c.d;
773
+ let ref = this.C();
774
+ return ref ? this.y(ref, startIndex) : false;
775
+ }
776
+ /**
777
+ * Consumes a doctype declaration if possible.
778
+ *
779
+ * This is a loose implementation since doctype declarations are currently
780
+ * discarded without further parsing.
781
+ *
782
+ * @returns Whether a doctype declaration was consumed.
783
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
784
+ */
785
+ M() {
508
786
  let { c: scanner } = this;
509
- if (!scanner.b("<!DOCTYPE") || !this.e()) {
787
+ let startIndex = scanner.d;
788
+ if (!scanner.b("<!DOCTYPE")) {
510
789
  return false;
511
790
  }
512
- scanner.s(/[^[>]+/y);
513
- if (scanner.s(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
514
- return true;
791
+ let name = this.e() && this.q();
792
+ if (!name) {
793
+ throw this.a("Expected a name");
794
+ }
795
+ let publicId;
796
+ let systemId;
797
+ if (this.e()) {
798
+ if (scanner.b("PUBLIC")) {
799
+ publicId = this.e() && this.N();
800
+ if (publicId === false) {
801
+ throw this.a("Expected a public identifier");
802
+ }
803
+ this.e();
804
+ }
805
+ if (publicId !== void 0 || scanner.b("SYSTEM")) {
806
+ this.e();
807
+ systemId = this.r();
808
+ if (systemId === false) {
809
+ throw this.a("Expected a system identifier");
810
+ }
811
+ this.e();
812
+ }
813
+ }
814
+ let internalSubset;
815
+ if (scanner.b("[")) {
816
+ internalSubset = scanner.x(/\][\x20\t\r\n]*>/);
817
+ if (!scanner.b("]")) {
818
+ throw this.a("Unclosed internal subset");
819
+ }
820
+ this.e();
515
821
  }
516
822
  if (!scanner.b(">")) {
517
823
  throw this.a("Unclosed doctype declaration");
518
824
  }
519
- return true;
520
- }
521
- z() {
825
+ return this.g.preserveDocumentType ? this.i(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
826
+ }
827
+ /**
828
+ * Consumes an element if possible.
829
+ *
830
+ * @returns Whether an element was consumed.
831
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
832
+ */
833
+ E() {
522
834
  let { c: scanner } = this;
523
- let mark = scanner.d;
835
+ let startIndex = scanner.d;
524
836
  if (!scanner.b("<")) {
525
837
  return false;
526
838
  }
527
839
  let name = this.q();
528
840
  if (!name) {
529
- scanner.o(mark);
841
+ scanner.n(startIndex);
530
842
  return false;
531
843
  }
532
- let attributes = this.G();
533
- let isEmpty = Boolean(scanner.b("/>"));
844
+ let attributes = this.H();
845
+ let isEmpty = !!scanner.b("/>");
534
846
  let element = new XmlElement(name, attributes);
535
- element.parent = this.i;
847
+ element.parent = this.j;
536
848
  if (!isEmpty) {
537
849
  if (!scanner.b(">")) {
538
850
  throw this.a(`Unclosed start tag for element \`${name}\``);
539
851
  }
540
- this.i = element;
852
+ this.j = element;
541
853
  do {
542
- this.J();
543
- } while (this.z() || this.K() || this.I() || this.C() || this.B());
854
+ this.K();
855
+ } while (this.E() || this.L() || this.J() || this.F() || this.D());
544
856
  let endTagMark = scanner.d;
545
857
  let endTagName;
546
858
  if (!scanner.b("</") || !(endTagName = this.q()) || endTagName !== name) {
547
- scanner.o(endTagMark);
859
+ scanner.n(endTagMark);
548
860
  throw this.a(`Missing end tag for element ${name}`);
549
861
  }
550
862
  this.e();
551
863
  if (!scanner.b(">")) {
552
864
  throw this.a(`Unclosed end tag for element ${name}`);
553
865
  }
554
- this.i = element.parent;
866
+ this.j = element.parent;
555
867
  }
556
- this.k(element);
557
- return true;
558
- }
559
- r() {
868
+ return this.i(element, startIndex);
869
+ }
870
+ /**
871
+ * Consumes an `Eq` production if possible.
872
+ *
873
+ * @returns Whether an `Eq` production was consumed.
874
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
875
+ */
876
+ t() {
560
877
  this.e();
561
878
  if (this.c.b("=")) {
562
879
  this.e();
@@ -564,22 +881,40 @@ var Parser = class {
564
881
  }
565
882
  return false;
566
883
  }
567
- u() {
568
- return this.B() || this.C() || this.e();
569
- }
884
+ /**
885
+ * Consumes `Misc` content if possible.
886
+ *
887
+ * @returns Whether anything was consumed.
888
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
889
+ */
890
+ z() {
891
+ return this.D() || this.F() || this.e();
892
+ }
893
+ /**
894
+ * Consumes one or more `Name` characters if possible.
895
+ *
896
+ * @returns `Name` characters, or an empty string if none were consumed.
897
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
898
+ */
570
899
  q() {
571
- return isNameStartChar(this.c.g()) ? this.c.t(isNameChar) : emptyString2;
572
- }
573
- C() {
900
+ return isNameStartChar(this.c.m()) ? this.c.w(isNameChar) : emptyString2;
901
+ }
902
+ /**
903
+ * Consumes a processing instruction if possible.
904
+ *
905
+ * @returns Whether a processing instruction was consumed.
906
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
907
+ */
908
+ F() {
574
909
  let { c: scanner } = this;
575
- let mark = scanner.d;
910
+ let startIndex = scanner.d;
576
911
  if (!scanner.b("<?")) {
577
912
  return false;
578
913
  }
579
914
  let name = this.q();
580
915
  if (name) {
581
916
  if (name.toLowerCase() === "xml") {
582
- scanner.o(mark);
917
+ scanner.n(startIndex);
583
918
  throw this.a("XML declaration isn't allowed here");
584
919
  }
585
920
  } else {
@@ -587,38 +922,73 @@ var Parser = class {
587
922
  }
588
923
  if (!this.e()) {
589
924
  if (scanner.b("?>")) {
590
- this.k(new XmlProcessingInstruction(name));
591
- return true;
925
+ return this.i(new XmlProcessingInstruction(name), startIndex);
592
926
  }
593
927
  throw this.a("Whitespace is required after a processing instruction name");
594
928
  }
595
- let content = scanner.p("?>");
596
- this.l(content);
929
+ let content = scanner.s("?>");
930
+ this.o(content);
597
931
  if (!scanner.b("?>")) {
598
932
  throw this.a("Unterminated processing instruction");
599
933
  }
600
- this.k(new XmlProcessingInstruction(name, content));
601
- return true;
602
- }
603
- F() {
934
+ return this.i(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
935
+ }
936
+ /**
937
+ * Consumes a prolog if possible.
938
+ *
939
+ * @returns Whether a prolog was consumed.
940
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
941
+ */
942
+ O() {
604
943
  let { c: scanner } = this;
605
- let mark = scanner.d;
606
- this.M();
607
- while (this.u()) {
944
+ let startIndex = scanner.d;
945
+ this.P();
946
+ while (this.z()) {
608
947
  }
609
- if (this.L()) {
610
- while (this.u()) {
948
+ if (this.M()) {
949
+ while (this.z()) {
611
950
  }
612
951
  }
613
- return mark < scanner.d;
952
+ return startIndex < scanner.d;
953
+ }
954
+ /**
955
+ * Consumes a public identifier literal if possible.
956
+ *
957
+ * @returns
958
+ * Value of the public identifier literal minus quotes, or `false` if
959
+ * nothing was consumed. An empty string indicates that a public id literal
960
+ * was consumed but was empty.
961
+ *
962
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
963
+ */
964
+ N() {
965
+ let startIndex = this.c.d;
966
+ let value = this.r();
967
+ if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
968
+ this.c.n(startIndex);
969
+ throw this.a("Invalid character in public identifier");
970
+ }
971
+ return value;
614
972
  }
615
- A() {
973
+ /**
974
+ * Consumes a reference if possible.
975
+ *
976
+ * This differs from `consumeContentReference()` in that a consumed reference
977
+ * will be returned rather than added to the document.
978
+ *
979
+ * @returns
980
+ * Parsed reference value, or `false` if nothing was consumed (to
981
+ * distinguish from a reference that resolves to an empty string).
982
+ *
983
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
984
+ */
985
+ C() {
616
986
  let { c: scanner } = this;
617
987
  if (!scanner.b("&")) {
618
988
  return false;
619
989
  }
620
- let ref = scanner.t(isReferenceChar);
621
- if (scanner.D() !== ";") {
990
+ let ref = scanner.w(isReferenceChar);
991
+ if (scanner.G() !== ";") {
622
992
  throw this.a("Unterminated reference (a reference must end with `;`)");
623
993
  }
624
994
  let parsedValue;
@@ -637,7 +1007,7 @@ var Parser = class {
637
1007
  let {
638
1008
  ignoreUndefinedEntities,
639
1009
  resolveUndefinedEntity
640
- } = this.options;
1010
+ } = this.g;
641
1011
  let wrappedRef = `&${ref};`;
642
1012
  if (resolveUndefinedEntity) {
643
1013
  let resolvedValue = resolveUndefinedEntity(wrappedRef);
@@ -652,48 +1022,79 @@ var Parser = class {
652
1022
  if (ignoreUndefinedEntities) {
653
1023
  return wrappedRef;
654
1024
  }
655
- scanner.o(-wrappedRef.length);
1025
+ scanner.n(-wrappedRef.length);
656
1026
  throw this.a(`Named entity isn't defined: ${wrappedRef}`);
657
1027
  }
658
1028
  }
659
1029
  return parsedValue;
660
1030
  }
661
- w() {
1031
+ /**
1032
+ * Consumes a `SystemLiteral` if possible.
1033
+ *
1034
+ * A `SystemLiteral` is similar to an attribute value, but allows the
1035
+ * characters `<` and `&` and doesn't replace references.
1036
+ *
1037
+ * @returns
1038
+ * Value of the `SystemLiteral` minus quotes, or `false` if nothing was
1039
+ * consumed. An empty string indicates that a `SystemLiteral` was consumed
1040
+ * but was empty.
1041
+ *
1042
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
1043
+ */
1044
+ r() {
662
1045
  let { c: scanner } = this;
663
1046
  let quote = scanner.b('"') || scanner.b("'");
664
1047
  if (!quote) {
665
1048
  return false;
666
1049
  }
667
- let value = scanner.p(quote);
668
- this.l(value);
1050
+ let value = scanner.s(quote);
1051
+ this.o(value);
669
1052
  if (!scanner.b(quote)) {
670
1053
  throw this.a("Missing end quote");
671
1054
  }
672
1055
  return value;
673
1056
  }
1057
+ /**
1058
+ * Consumes one or more whitespace characters if possible.
1059
+ *
1060
+ * @returns Whether any whitespace characters were consumed.
1061
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
1062
+ */
674
1063
  e() {
675
- return Boolean(this.c.t(isWhitespace));
676
- }
677
- M() {
1064
+ return !!this.c.w(isWhitespace);
1065
+ }
1066
+ /**
1067
+ * Consumes an XML declaration if possible.
1068
+ *
1069
+ * @returns Whether an XML declaration was consumed.
1070
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
1071
+ */
1072
+ P() {
678
1073
  let { c: scanner } = this;
1074
+ let startIndex = scanner.d;
679
1075
  if (!scanner.b("<?xml")) {
680
1076
  return false;
681
1077
  }
682
1078
  if (!this.e()) {
683
1079
  throw this.a("Invalid XML declaration");
684
1080
  }
685
- let version = Boolean(scanner.b("version")) && this.r() && this.w();
1081
+ let version = !!scanner.b("version") && this.t() && this.r();
686
1082
  if (version === false) {
687
1083
  throw this.a("XML version is missing or invalid");
688
1084
  } else if (!/^1\.[0-9]+$/.test(version)) {
689
1085
  throw this.a("Invalid character in version number");
690
1086
  }
1087
+ let encoding;
1088
+ let standalone;
691
1089
  if (this.e()) {
692
- let encoding = Boolean(scanner.b("encoding")) && this.r() && this.w();
1090
+ encoding = !!scanner.b("encoding") && this.t() && this.r();
693
1091
  if (encoding) {
1092
+ if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
1093
+ throw this.a("Invalid character in encoding name");
1094
+ }
694
1095
  this.e();
695
1096
  }
696
- let standalone = Boolean(scanner.b("standalone")) && this.r() && this.w();
1097
+ standalone = !!scanner.b("standalone") && this.t() && this.r();
697
1098
  if (standalone) {
698
1099
  if (standalone !== "yes" && standalone !== "no") {
699
1100
  throw this.a('Only "yes" and "no" are permitted as values of `standalone`');
@@ -704,54 +1105,44 @@ var Parser = class {
704
1105
  if (!scanner.b("?>")) {
705
1106
  throw this.a("Invalid or unclosed XML declaration");
706
1107
  }
707
- return true;
708
- }
1108
+ return this.g.preserveXmlDeclaration ? this.i(new XmlDeclaration(
1109
+ version,
1110
+ encoding || void 0,
1111
+ standalone || void 0
1112
+ ), startIndex) : true;
1113
+ }
1114
+ /**
1115
+ * Returns an `XmlError` for the current scanner position.
1116
+ */
709
1117
  a(message) {
710
- let { d: charIndex, string: xml } = this.c;
711
- let column = 1;
712
- let excerpt = "";
713
- let line = 1;
714
- for (let i = 0; i < charIndex; ++i) {
715
- let char = xml[i];
716
- if (char === "\n") {
717
- column = 1;
718
- excerpt = "";
719
- line += 1;
720
- } else {
721
- column += 1;
722
- excerpt += char;
723
- }
1118
+ let { c: scanner } = this;
1119
+ return new XmlError(message, scanner.d, scanner.h);
1120
+ }
1121
+ /**
1122
+ * Parses the XML input.
1123
+ */
1124
+ parse() {
1125
+ this.c.b("\uFEFF");
1126
+ this.O();
1127
+ if (!this.E()) {
1128
+ throw this.a("Root element is missing or invalid");
724
1129
  }
725
- let eol = xml.indexOf("\n", charIndex);
726
- excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
727
- let excerptStart = 0;
728
- if (excerpt.length > 50) {
729
- if (column < 40) {
730
- excerpt = excerpt.slice(0, 50);
731
- } else {
732
- excerptStart = column - 20;
733
- excerpt = excerpt.slice(excerptStart, column + 30);
734
- }
1130
+ while (this.z()) {
1131
+ }
1132
+ if (!this.c.B) {
1133
+ throw this.a("Extra content at the end of the document");
735
1134
  }
736
- let err = new Error(
737
- `${message} (line ${line}, column ${column})
738
- ${excerpt}
739
- ` + " ".repeat(column - excerptStart + 1) + "^\n"
740
- );
741
- Object.assign(err, {
742
- column,
743
- excerpt,
744
- line,
745
- pos: charIndex
746
- });
747
- return err;
748
1135
  }
749
- l(string) {
1136
+ /**
1137
+ * Throws an invalid character error if any character in the given _string_
1138
+ * isn't a valid XML character.
1139
+ */
1140
+ o(string) {
750
1141
  let { length } = string;
751
1142
  for (let i = 0; i < length; ++i) {
752
1143
  let cp = string.codePointAt(i);
753
1144
  if (!isXmlCodePoint(cp)) {
754
- this.c.o(-([...string].length - i));
1145
+ this.c.n(-([...string].length - i));
755
1146
  throw this.a("Invalid character");
756
1147
  }
757
1148
  if (cp > 65535) {
@@ -760,11 +1151,12 @@ var Parser = class {
760
1151
  }
761
1152
  }
762
1153
  };
763
- function normalizeXmlString(xml) {
764
- if (xml[0] === "\uFEFF") {
765
- xml = xml.slice(1);
1154
+ function normalizeLineBreaks(text) {
1155
+ let i = 0;
1156
+ while ((i = text.indexOf("\r", i)) !== -1) {
1157
+ text = text[i + 1] === "\n" ? text.slice(0, i) + text.slice(i + 1) : text.slice(0, i) + "\n" + text.slice(i + 1);
766
1158
  }
767
- return xml.replace(/\r\n?/g, "\n");
1159
+ return text;
768
1160
  }
769
1161
 
770
1162
  // src/index.ts