@rgrove/parse-xml 3.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +72 -97
  3. package/dist/browser.js +774 -0
  4. package/dist/browser.js.map +7 -0
  5. package/dist/global.min.js +10 -0
  6. package/dist/global.min.js.map +7 -0
  7. package/dist/index.d.ts +24 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +50 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/lib/Parser.d.ts +218 -0
  12. package/dist/lib/Parser.d.ts.map +1 -0
  13. package/dist/lib/Parser.js +638 -0
  14. package/dist/lib/Parser.js.map +1 -0
  15. package/dist/lib/StringScanner.d.ts +97 -0
  16. package/dist/lib/StringScanner.d.ts.map +1 -0
  17. package/dist/lib/StringScanner.js +210 -0
  18. package/dist/lib/StringScanner.js.map +1 -0
  19. package/dist/lib/XmlCdata.d.ts +8 -0
  20. package/dist/lib/XmlCdata.d.ts.map +1 -0
  21. package/dist/lib/XmlCdata.js +15 -0
  22. package/dist/lib/XmlCdata.js.map +1 -0
  23. package/dist/lib/XmlComment.d.ts +16 -0
  24. package/dist/lib/XmlComment.d.ts.map +1 -0
  25. package/dist/lib/XmlComment.js +23 -0
  26. package/dist/lib/XmlComment.js.map +1 -0
  27. package/dist/lib/XmlDocument.d.ts +29 -0
  28. package/dist/lib/XmlDocument.d.ts.map +1 -0
  29. package/dist/lib/XmlDocument.js +47 -0
  30. package/dist/lib/XmlDocument.js.map +1 -0
  31. package/dist/lib/XmlElement.d.ts +40 -0
  32. package/dist/lib/XmlElement.d.ts.map +1 -0
  33. package/dist/lib/XmlElement.js +51 -0
  34. package/dist/lib/XmlElement.js.map +1 -0
  35. package/dist/lib/XmlNode.d.ts +74 -0
  36. package/dist/lib/XmlNode.d.ts.map +1 -0
  37. package/dist/lib/XmlNode.js +96 -0
  38. package/dist/lib/XmlNode.js.map +1 -0
  39. package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
  40. package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
  41. package/dist/lib/XmlProcessingInstruction.js +25 -0
  42. package/dist/lib/XmlProcessingInstruction.js.map +1 -0
  43. package/dist/lib/XmlText.d.ts +16 -0
  44. package/dist/lib/XmlText.d.ts.map +1 -0
  45. package/dist/lib/XmlText.js +23 -0
  46. package/dist/lib/XmlText.js.map +1 -0
  47. package/dist/lib/syntax.d.ts +69 -0
  48. package/dist/lib/syntax.d.ts.map +1 -0
  49. package/dist/lib/syntax.js +133 -0
  50. package/dist/lib/syntax.js.map +1 -0
  51. package/dist/lib/types.d.ts +5 -0
  52. package/dist/lib/types.d.ts.map +1 -0
  53. package/dist/lib/types.js +3 -0
  54. package/dist/lib/types.js.map +1 -0
  55. package/package.json +30 -22
  56. package/src/index.ts +30 -0
  57. package/src/lib/Parser.ts +819 -0
  58. package/src/lib/StringScanner.ts +254 -0
  59. package/src/lib/XmlCdata.ts +11 -0
  60. package/src/lib/XmlComment.ts +26 -0
  61. package/src/lib/XmlDocument.ts +57 -0
  62. package/src/lib/XmlElement.ts +81 -0
  63. package/src/lib/XmlNode.ts +107 -0
  64. package/src/lib/XmlProcessingInstruction.ts +35 -0
  65. package/src/lib/XmlText.ts +26 -0
  66. package/src/lib/syntax.ts +136 -0
  67. package/src/lib/types.ts +2 -0
  68. package/CHANGELOG.md +0 -162
  69. package/dist/types/index.d.ts +0 -68
  70. package/dist/types/index.d.ts.map +0 -1
  71. package/dist/types/lib/Parser.d.ts +0 -234
  72. package/dist/types/lib/Parser.d.ts.map +0 -1
  73. package/dist/types/lib/StringScanner.d.ts +0 -139
  74. package/dist/types/lib/StringScanner.d.ts.map +0 -1
  75. package/dist/types/lib/XmlCdata.d.ts +0 -11
  76. package/dist/types/lib/XmlCdata.d.ts.map +0 -1
  77. package/dist/types/lib/XmlComment.d.ts +0 -21
  78. package/dist/types/lib/XmlComment.d.ts.map +0 -1
  79. package/dist/types/lib/XmlDocument.d.ts +0 -42
  80. package/dist/types/lib/XmlDocument.d.ts.map +0 -1
  81. package/dist/types/lib/XmlElement.d.ts +0 -62
  82. package/dist/types/lib/XmlElement.d.ts.map +0 -1
  83. package/dist/types/lib/XmlNode.d.ts +0 -78
  84. package/dist/types/lib/XmlNode.d.ts.map +0 -1
  85. package/dist/types/lib/XmlProcessingInstruction.d.ts +0 -30
  86. package/dist/types/lib/XmlProcessingInstruction.d.ts.map +0 -1
  87. package/dist/types/lib/XmlText.d.ts +0 -21
  88. package/dist/types/lib/XmlText.d.ts.map +0 -1
  89. package/dist/types/lib/syntax.d.ts +0 -59
  90. package/dist/types/lib/syntax.d.ts.map +0 -1
  91. package/dist/umd/parse-xml.min.js +0 -2
  92. package/dist/umd/parse-xml.min.js.map +0 -1
  93. package/src/index.js +0 -67
  94. package/src/lib/Parser.js +0 -812
  95. package/src/lib/StringScanner.js +0 -312
  96. package/src/lib/XmlCdata.js +0 -17
  97. package/src/lib/XmlComment.js +0 -37
  98. package/src/lib/XmlDocument.js +0 -69
  99. package/src/lib/XmlElement.js +0 -101
  100. package/src/lib/XmlNode.js +0 -152
  101. package/src/lib/XmlProcessingInstruction.js +0 -48
  102. package/src/lib/XmlText.js +0 -37
  103. package/src/lib/syntax.js +0 -153
@@ -0,0 +1,819 @@
1
+ import { StringScanner } from './StringScanner.js';
2
+ import * as syntax from './syntax.js';
3
+ import { XmlCdata } from './XmlCdata.js';
4
+ import { XmlComment } from './XmlComment.js';
5
+ import { XmlDocument } from './XmlDocument.js';
6
+ import { XmlElement } from './XmlElement.js';
7
+ import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
8
+ import { XmlText } from './XmlText.js';
9
+
10
+ import type { XmlNode } from './XmlNode.js';
11
+
12
+
13
+ const emptyString = '';
14
+
15
+ /**
16
+ * Parses an XML string into an `XmlDocument`.
17
+ *
18
+ * @private
19
+ */
20
+ export class Parser {
21
+ readonly document: XmlDocument;
22
+
23
+ private currentNode: XmlDocument | XmlElement;
24
+ private readonly options: ParserOptions;
25
+ private readonly scanner: StringScanner;
26
+
27
+ /**
28
+ * @param xml XML string to parse.
29
+ * @param options Parser options.
30
+ */
31
+ constructor(xml: string, options: ParserOptions = {}) {
32
+ this.document = new XmlDocument();
33
+ this.currentNode = this.document;
34
+ this.options = options;
35
+ this.scanner = new StringScanner(normalizeXmlString(xml));
36
+
37
+ this.consumeProlog();
38
+
39
+ if (!this.consumeElement()) {
40
+ throw this.error('Root element is missing or invalid');
41
+ }
42
+
43
+ while (this.consumeMisc()) {} // eslint-disable-line no-empty
44
+
45
+ if (!this.scanner.isEnd) {
46
+ throw this.error('Extra content at the end of the document');
47
+ }
48
+ }
49
+
50
+ /**
51
+ * Adds the given `XmlNode` as a child of `this.currentNode`.
52
+ */
53
+ addNode(node: XmlNode) {
54
+ node.parent = this.currentNode;
55
+
56
+ // @ts-expect-error: XmlDocument has a more limited set of possible children
57
+ // than XmlElement so TypeScript is unhappy, but we always do the right
58
+ // thing.
59
+ this.currentNode.children.push(node);
60
+ }
61
+
62
+ /**
63
+ * Adds the given _text_ to the document, either by appending it to a
64
+ * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
65
+ */
66
+ addText(text: string) {
67
+ let { children } = this.currentNode;
68
+ let { length } = children;
69
+
70
+ if (length > 0) {
71
+ let prevNode = children[length - 1];
72
+
73
+ if (prevNode instanceof XmlText) {
74
+ // The previous node is a text node, so we can append to it and avoid
75
+ // creating another node.
76
+ prevNode.text += text;
77
+ return;
78
+ }
79
+ }
80
+
81
+ this.addNode(new XmlText(text));
82
+ }
83
+
84
+ /**
85
+ * Consumes element attributes.
86
+ *
87
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
88
+ */
89
+ consumeAttributes(): Record<string, string> {
90
+ let attributes = Object.create(null);
91
+
92
+ while (this.consumeWhitespace()) {
93
+ let attrName = this.consumeName();
94
+
95
+ if (!attrName) {
96
+ break;
97
+ }
98
+
99
+ let attrValue = this.consumeEqual() && this.consumeAttributeValue();
100
+
101
+ if (attrValue === false) {
102
+ throw this.error('Attribute value expected');
103
+ }
104
+
105
+ if (attrName in attributes) {
106
+ throw this.error(`Duplicate attribute: ${attrName}`);
107
+ }
108
+
109
+ if (attrName === 'xml:space'
110
+ && attrValue !== 'default'
111
+ && attrValue !== 'preserve') {
112
+
113
+ throw this.error('Value of the `xml:space` attribute must be "default" or "preserve"');
114
+ }
115
+
116
+ attributes[attrName] = attrValue;
117
+ }
118
+
119
+ if (this.options.sortAttributes) {
120
+ let attrNames = Object.keys(attributes).sort();
121
+ let sortedAttributes = Object.create(null);
122
+
123
+ for (let i = 0; i < attrNames.length; ++i) {
124
+ let attrName = attrNames[i] as string;
125
+ sortedAttributes[attrName] = attributes[attrName];
126
+ }
127
+
128
+ attributes = sortedAttributes;
129
+ }
130
+
131
+ return attributes;
132
+ }
133
+
134
+ /**
135
+ * Consumes an `AttValue` (attribute value) if possible.
136
+ *
137
+ * @returns
138
+ * Contents of the `AttValue` minus quotes, or `false` if nothing was
139
+ * consumed. An empty string indicates that an `AttValue` was consumed but
140
+ * was empty.
141
+ *
142
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
143
+ */
144
+ consumeAttributeValue(): string | false {
145
+ let { scanner } = this;
146
+ let quote = scanner.peek();
147
+
148
+ if (quote !== '"' && quote !== "'") {
149
+ return false;
150
+ }
151
+
152
+ scanner.advance();
153
+
154
+ let chars;
155
+ let isClosed = false;
156
+ let value = emptyString;
157
+ let regex = quote === '"'
158
+ ? syntax.attValueCharDoubleQuote
159
+ : syntax.attValueCharSingleQuote;
160
+
161
+ matchLoop: while (!scanner.isEnd) {
162
+ chars = scanner.consumeMatch(regex);
163
+
164
+ if (chars) {
165
+ this.validateChars(chars);
166
+ value += chars.replace(syntax.attValueNormalizedWhitespace, ' ');
167
+ }
168
+
169
+ switch (scanner.peek()) {
170
+ case quote:
171
+ isClosed = true;
172
+ break matchLoop;
173
+
174
+ case '&':
175
+ value += this.consumeReference();
176
+ continue;
177
+
178
+ case '<':
179
+ throw this.error('Unescaped `<` is not allowed in an attribute value');
180
+
181
+ case emptyString:
182
+ break matchLoop;
183
+ }
184
+ }
185
+
186
+ if (!isClosed) {
187
+ throw this.error('Unclosed attribute');
188
+ }
189
+
190
+ scanner.advance();
191
+ return value;
192
+ }
193
+
194
+ /**
195
+ * Consumes a CDATA section if possible.
196
+ *
197
+ * @returns Whether a CDATA section was consumed.
198
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
199
+ */
200
+ consumeCdataSection(): boolean {
201
+ let { scanner } = this;
202
+
203
+ if (!scanner.consumeStringFast('<![CDATA[')) {
204
+ return false;
205
+ }
206
+
207
+ let text = scanner.consumeUntilString(']]>');
208
+ this.validateChars(text);
209
+
210
+ if (!scanner.consumeStringFast(']]>')) {
211
+ throw this.error('Unclosed CDATA section');
212
+ }
213
+
214
+ if (this.options.preserveCdata) {
215
+ this.addNode(new XmlCdata(text));
216
+ } else {
217
+ this.addText(text);
218
+ }
219
+
220
+ return true;
221
+ }
222
+
223
+ /**
224
+ * Consumes character data if possible.
225
+ *
226
+ * @returns Whether character data was consumed.
227
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
228
+ */
229
+ consumeCharData(): boolean {
230
+ let { scanner } = this;
231
+ let charData = scanner.consumeUntilMatch(syntax.endCharData);
232
+
233
+ if (!charData) {
234
+ return false;
235
+ }
236
+
237
+ this.validateChars(charData);
238
+
239
+ if (scanner.peek(3) === ']]>') {
240
+ throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
241
+ }
242
+
243
+ this.addText(charData);
244
+ return true;
245
+ }
246
+
247
+ /**
248
+ * Consumes a comment if possible.
249
+ *
250
+ * @returns Whether a comment was consumed.
251
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
252
+ */
253
+ consumeComment(): boolean {
254
+ let { scanner } = this;
255
+
256
+ if (!scanner.consumeStringFast('<!--')) {
257
+ return false;
258
+ }
259
+
260
+ let content = scanner.consumeUntilString('--');
261
+ this.validateChars(content);
262
+
263
+ if (!scanner.consumeStringFast('-->')) {
264
+ if (scanner.peek(2) === '--') {
265
+ throw this.error("The string `--` isn't allowed inside a comment");
266
+ }
267
+
268
+ throw this.error('Unclosed comment');
269
+ }
270
+
271
+ if (this.options.preserveComments) {
272
+ this.addNode(new XmlComment(content.trim()));
273
+ }
274
+
275
+ return true;
276
+ }
277
+
278
+ /**
279
+ * Consumes a reference in a content context if possible.
280
+ *
281
+ * This differs from `consumeReference()` in that a consumed reference will be
282
+ * added to the document as a text node instead of returned.
283
+ *
284
+ * @returns Whether a reference was consumed.
285
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
286
+ */
287
+ consumeContentReference(): boolean {
288
+ let ref = this.consumeReference();
289
+
290
+ if (ref) {
291
+ this.addText(ref);
292
+ return true;
293
+ }
294
+
295
+ return false;
296
+ }
297
+
298
+ /**
299
+ * Consumes a doctype declaration if possible.
300
+ *
301
+ * This is a loose implementation since doctype declarations are currently
302
+ * discarded without further parsing.
303
+ *
304
+ * @returns Whether a doctype declaration was consumed.
305
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
306
+ */
307
+ consumeDoctypeDeclaration(): boolean {
308
+ let { scanner } = this;
309
+
310
+ if (!scanner.consumeStringFast('<!DOCTYPE')
311
+ || !this.consumeWhitespace()) {
312
+
313
+ return false;
314
+ }
315
+
316
+ scanner.consumeMatch(/[^[>]+/y);
317
+
318
+ if (scanner.consumeMatch(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
319
+ return true;
320
+ }
321
+
322
+ if (!scanner.consumeStringFast('>')) {
323
+ throw this.error('Unclosed doctype declaration');
324
+ }
325
+
326
+ return true;
327
+ }
328
+
329
+ /**
330
+ * Consumes an element if possible.
331
+ *
332
+ * @returns Whether an element was consumed.
333
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
334
+ */
335
+ consumeElement(): boolean {
336
+ let { scanner } = this;
337
+ let mark = scanner.charIndex;
338
+
339
+ if (!scanner.consumeStringFast('<')) {
340
+ return false;
341
+ }
342
+
343
+ let name = this.consumeName();
344
+
345
+ if (!name) {
346
+ scanner.reset(mark);
347
+ return false;
348
+ }
349
+
350
+ let attributes = this.consumeAttributes();
351
+ let isEmpty = Boolean(scanner.consumeStringFast('/>'));
352
+ let element = new XmlElement(name, attributes);
353
+
354
+ element.parent = this.currentNode;
355
+
356
+ if (!isEmpty) {
357
+ if (!scanner.consumeStringFast('>')) {
358
+ throw this.error(`Unclosed start tag for element \`${name}\``);
359
+ }
360
+
361
+ this.currentNode = element;
362
+
363
+ do {
364
+ this.consumeCharData();
365
+ } while (
366
+ this.consumeElement()
367
+ || this.consumeContentReference()
368
+ || this.consumeCdataSection()
369
+ || this.consumeProcessingInstruction()
370
+ || this.consumeComment()
371
+ );
372
+
373
+ let endTagMark = scanner.charIndex;
374
+ let endTagName;
375
+
376
+ if (!scanner.consumeStringFast('</')
377
+ || !(endTagName = this.consumeName())
378
+ || endTagName !== name) {
379
+
380
+ scanner.reset(endTagMark);
381
+ throw this.error(`Missing end tag for element ${name}`);
382
+ }
383
+
384
+ this.consumeWhitespace();
385
+
386
+ if (!scanner.consumeStringFast('>')) {
387
+ throw this.error(`Unclosed end tag for element ${name}`);
388
+ }
389
+
390
+ this.currentNode = element.parent;
391
+ }
392
+
393
+ this.addNode(element);
394
+ return true;
395
+ }
396
+
397
+ /**
398
+ * Consumes an `Eq` production if possible.
399
+ *
400
+ * @returns Whether an `Eq` production was consumed.
401
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
402
+ */
403
+ consumeEqual(): boolean {
404
+ this.consumeWhitespace();
405
+
406
+ if (this.scanner.consumeStringFast('=')) {
407
+ this.consumeWhitespace();
408
+ return true;
409
+ }
410
+
411
+ return false;
412
+ }
413
+
414
+ /**
415
+ * Consumes `Misc` content if possible.
416
+ *
417
+ * @returns Whether anything was consumed.
418
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
419
+ */
420
+ consumeMisc(): boolean {
421
+ return this.consumeComment()
422
+ || this.consumeProcessingInstruction()
423
+ || this.consumeWhitespace();
424
+ }
425
+
426
+ /**
427
+ * Consumes one or more `Name` characters if possible.
428
+ *
429
+ * @returns `Name` characters, or an empty string if none were consumed.
430
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
431
+ */
432
+ consumeName(): string {
433
+ return syntax.isNameStartChar(this.scanner.peek())
434
+ ? this.scanner.consumeMatchFn(syntax.isNameChar)
435
+ : emptyString;
436
+ }
437
+
438
+ /**
439
+ * Consumes a processing instruction if possible.
440
+ *
441
+ * @returns Whether a processing instruction was consumed.
442
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
443
+ */
444
+ consumeProcessingInstruction(): boolean {
445
+ let { scanner } = this;
446
+ let mark = scanner.charIndex;
447
+
448
+ if (!scanner.consumeStringFast('<?')) {
449
+ return false;
450
+ }
451
+
452
+ let name = this.consumeName();
453
+
454
+ if (name) {
455
+ if (name.toLowerCase() === 'xml') {
456
+ scanner.reset(mark);
457
+ throw this.error("XML declaration isn't allowed here");
458
+ }
459
+ } else {
460
+ throw this.error('Invalid processing instruction');
461
+ }
462
+
463
+ if (!this.consumeWhitespace()) {
464
+ if (scanner.consumeStringFast('?>')) {
465
+ this.addNode(new XmlProcessingInstruction(name));
466
+ return true;
467
+ }
468
+
469
+ throw this.error('Whitespace is required after a processing instruction name');
470
+ }
471
+
472
+ let content = scanner.consumeUntilString('?>');
473
+ this.validateChars(content);
474
+
475
+ if (!scanner.consumeStringFast('?>')) {
476
+ throw this.error('Unterminated processing instruction');
477
+ }
478
+
479
+ this.addNode(new XmlProcessingInstruction(name, content));
480
+ return true;
481
+ }
482
+
483
+ /**
484
+ * Consumes a prolog if possible.
485
+ *
486
+ * @returns Whether a prolog was consumed.
487
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
488
+ */
489
+ consumeProlog(): boolean {
490
+ let { scanner } = this;
491
+ let mark = scanner.charIndex;
492
+
493
+ this.consumeXmlDeclaration();
494
+
495
+ while (this.consumeMisc()) {} // eslint-disable-line no-empty
496
+
497
+ if (this.consumeDoctypeDeclaration()) {
498
+ while (this.consumeMisc()) {} // eslint-disable-line no-empty
499
+ }
500
+
501
+ return mark < scanner.charIndex;
502
+ }
503
+
504
+ /**
505
+ * Consumes a reference if possible.
506
+ *
507
+ * This differs from `consumeContentReference()` in that a consumed reference
508
+ * will be returned rather than added to the document.
509
+ *
510
+ * @returns
511
+ * Parsed reference value, or `false` if nothing was consumed (to
512
+ * distinguish from a reference that resolves to an empty string).
513
+ *
514
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
515
+ */
516
+ consumeReference(): string | false {
517
+ let { scanner } = this;
518
+
519
+ if (!scanner.consumeStringFast('&')) {
520
+ return false;
521
+ }
522
+
523
+ let ref = scanner.consumeMatchFn(syntax.isReferenceChar);
524
+
525
+ if (scanner.consume() !== ';') {
526
+ throw this.error('Unterminated reference (a reference must end with `;`)');
527
+ }
528
+
529
+ let parsedValue;
530
+
531
+ if (ref[0] === '#') {
532
+ // This is a character reference.
533
+ let codePoint = ref[1] === 'x'
534
+ ? parseInt(ref.slice(2), 16) // Hex codepoint.
535
+ : parseInt(ref.slice(1), 10); // Decimal codepoint.
536
+
537
+ if (isNaN(codePoint)) {
538
+ throw this.error('Invalid character reference');
539
+ }
540
+
541
+ if (!syntax.isXmlCodePoint(codePoint)) {
542
+ throw this.error('Character reference resolves to an invalid character');
543
+ }
544
+
545
+ parsedValue = String.fromCodePoint(codePoint);
546
+ } else {
547
+ // This is an entity reference.
548
+ parsedValue = syntax.predefinedEntities[ref];
549
+
550
+ if (parsedValue === undefined) {
551
+ let {
552
+ ignoreUndefinedEntities,
553
+ resolveUndefinedEntity,
554
+ } = this.options;
555
+
556
+ let wrappedRef = `&${ref};`; // for backcompat with <= 2.x
557
+
558
+ if (resolveUndefinedEntity) {
559
+ let resolvedValue = resolveUndefinedEntity(wrappedRef);
560
+
561
+ if (resolvedValue !== null && resolvedValue !== undefined) {
562
+ let type = typeof resolvedValue;
563
+
564
+ if (type !== 'string') {
565
+ throw new TypeError(`\`resolveUndefinedEntity()\` must return a string, \`null\`, or \`undefined\`, but returned a value of type ${type}`);
566
+ }
567
+
568
+ return resolvedValue;
569
+ }
570
+ }
571
+
572
+ if (ignoreUndefinedEntities) {
573
+ return wrappedRef;
574
+ }
575
+
576
+ scanner.reset(-wrappedRef.length);
577
+ throw this.error(`Named entity isn't defined: ${wrappedRef}`);
578
+ }
579
+ }
580
+
581
+ return parsedValue;
582
+ }
583
+
584
+ /**
585
+ * Consumes a `SystemLiteral` if possible.
586
+ *
587
+ * A `SystemLiteral` is similar to an attribute value, but allows the
588
+ * characters `<` and `&` and doesn't replace references.
589
+ *
590
+ * @returns
591
+ * Value of the `SystemLiteral` minus quotes, or `false` if nothing was
592
+ * consumed. An empty string indicates that a `SystemLiteral` was consumed
593
+ * but was empty.
594
+ *
595
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
596
+ */
597
+ consumeSystemLiteral(): string | false {
598
+ let { scanner } = this;
599
+ let quote = scanner.consumeStringFast('"') || scanner.consumeStringFast("'");
600
+
601
+ if (!quote) {
602
+ return false;
603
+ }
604
+
605
+ let value = scanner.consumeUntilString(quote);
606
+ this.validateChars(value);
607
+
608
+ if (!scanner.consumeStringFast(quote)) {
609
+ throw this.error('Missing end quote');
610
+ }
611
+
612
+ return value;
613
+ }
614
+
615
+ /**
616
+ * Consumes one or more whitespace characters if possible.
617
+ *
618
+ * @returns Whether any whitespace characters were consumed.
619
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
620
+ */
621
+ consumeWhitespace(): boolean {
622
+ return Boolean(this.scanner.consumeMatchFn(syntax.isWhitespace));
623
+ }
624
+
625
+ /**
626
+ * Consumes an XML declaration if possible.
627
+ *
628
+ * @returns Whether an XML declaration was consumed.
629
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
630
+ */
631
+ consumeXmlDeclaration(): boolean {
632
+ let { scanner } = this;
633
+
634
+ if (!scanner.consumeStringFast('<?xml')) {
635
+ return false;
636
+ }
637
+
638
+ if (!this.consumeWhitespace()) {
639
+ throw this.error('Invalid XML declaration');
640
+ }
641
+
642
+ let version = Boolean(scanner.consumeStringFast('version'))
643
+ && this.consumeEqual()
644
+ && this.consumeSystemLiteral();
645
+
646
+ if (version === false) {
647
+ throw this.error('XML version is missing or invalid');
648
+ } else if (!/^1\.[0-9]+$/.test(version)) {
649
+ throw this.error('Invalid character in version number');
650
+ }
651
+
652
+ if (this.consumeWhitespace()) {
653
+ let encoding = Boolean(scanner.consumeStringFast('encoding'))
654
+ && this.consumeEqual()
655
+ && this.consumeSystemLiteral();
656
+
657
+ if (encoding) {
658
+ this.consumeWhitespace();
659
+ }
660
+
661
+ let standalone = Boolean(scanner.consumeStringFast('standalone'))
662
+ && this.consumeEqual()
663
+ && this.consumeSystemLiteral();
664
+
665
+ if (standalone) {
666
+ if (standalone !== 'yes' && standalone !== 'no') {
667
+ throw this.error('Only "yes" and "no" are permitted as values of `standalone`');
668
+ }
669
+
670
+ this.consumeWhitespace();
671
+ }
672
+ }
673
+
674
+ if (!scanner.consumeStringFast('?>')) {
675
+ throw this.error('Invalid or unclosed XML declaration');
676
+ }
677
+
678
+ return true;
679
+ }
680
+
681
+ /**
682
+ * Throws an error at the current scanner position.
683
+ */
684
+ error(message: string) {
685
+ let { charIndex, string: xml } = this.scanner;
686
+ let column = 1;
687
+ let excerpt = '';
688
+ let line = 1;
689
+
690
+ // Find the line and column where the error occurred.
691
+ for (let i = 0; i < charIndex; ++i) {
692
+ let char = xml[i];
693
+
694
+ if (char === '\n') {
695
+ column = 1;
696
+ excerpt = '';
697
+ line += 1;
698
+ } else {
699
+ column += 1;
700
+ excerpt += char;
701
+ }
702
+ }
703
+
704
+ let eol = xml.indexOf('\n', charIndex);
705
+
706
+ excerpt += eol === -1
707
+ ? xml.slice(charIndex)
708
+ : xml.slice(charIndex, eol);
709
+
710
+ let excerptStart = 0;
711
+
712
+ // Keep the excerpt below 50 chars, but always keep the error position in
713
+ // view.
714
+ if (excerpt.length > 50) {
715
+ if (column < 40) {
716
+ excerpt = excerpt.slice(0, 50);
717
+ } else {
718
+ excerptStart = column - 20;
719
+ excerpt = excerpt.slice(excerptStart, column + 30);
720
+ }
721
+ }
722
+
723
+ let err = new Error(
724
+ `${message} (line ${line}, column ${column})\n`
725
+ + ` ${excerpt}\n`
726
+ + ' '.repeat(column - excerptStart + 1) + '^\n',
727
+ );
728
+
729
+ Object.assign(err, {
730
+ column,
731
+ excerpt,
732
+ line,
733
+ pos: charIndex,
734
+ });
735
+
736
+ return err;
737
+ }
738
+
739
+ /**
740
+ * Throws an invalid character error if any character in the given _string_
741
+ * isn't a valid XML character.
742
+ */
743
+ validateChars(string: string) {
744
+ let { length } = string;
745
+
746
+ for (let i = 0; i < length; ++i) {
747
+ let cp = string.codePointAt(i) as number;
748
+
749
+ if (!syntax.isXmlCodePoint(cp)) {
750
+ this.scanner.reset(-([ ...string ].length - i));
751
+ throw this.error('Invalid character');
752
+ }
753
+
754
+ if (cp > 65535) {
755
+ i += 1;
756
+ }
757
+ }
758
+ }
759
+ }
760
+
761
+ // -- Private Functions --------------------------------------------------------
762
+
763
+ /**
764
+ * Normalizes the given XML string by stripping a byte order mark (if present)
765
+ * and replacing CRLF sequences and lone CR characters with LF characters.
766
+ */
767
+ function normalizeXmlString(xml: string): string {
768
+ if (xml[0] === '\uFEFF') {
769
+ xml = xml.slice(1);
770
+ }
771
+
772
+ return xml.replace(/\r\n?/g, '\n');
773
+ }
774
+
775
+ // -- Types --------------------------------------------------------------------
776
+ export type ParserOptions = {
777
+ /**
778
+ * When `true`, an undefined named entity (like "&bogus;") will be left in the
779
+ * output as is instead of causing a parse error.
780
+ *
781
+ * @default false
782
+ */
783
+ ignoreUndefinedEntities?: boolean;
784
+
785
+ /**
786
+ * When `true`, CDATA sections will be preserved in the document as `XmlCdata`
787
+ * nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
788
+ * which keeps the node tree simpler and easier to work with.
789
+ *
790
+ * @default false
791
+ */
792
+ preserveCdata?: boolean;
793
+
794
+ /**
795
+ * When `true`, comments will be preserved in the document as `XmlComment`
796
+ * nodes. Otherwise comments will not be included in the node tree.
797
+ *
798
+ * @default false
799
+ */
800
+ preserveComments?: boolean;
801
+
802
+ /**
803
+ * When an undefined named entity is encountered, this function will be called
804
+ * with the entity as its only argument. It should return a string value with
805
+ * which to replace the entity, or `null` or `undefined` to treat the entity
806
+ * as undefined (which may result in a parse error depending on the value of
807
+ * `ignoreUndefinedEntities`).
808
+ */
809
+ resolveUndefinedEntity?: (entity: string) => string | null | undefined;
810
+
811
+ /**
812
+ * When `true`, attributes in an element's `attributes` object will be sorted
813
+ * in alphanumeric order by name. Otherwise they'll retain their original
814
+ * order as found in the XML.
815
+ *
816
+ * @default false
817
+ */
818
+ sortAttributes?: boolean;
819
+ };