@rgrove/parse-xml 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +46 -31
  2. package/dist/browser.js +692 -300
  3. package/dist/browser.js.map +4 -4
  4. package/dist/global.min.js +9 -8
  5. package/dist/global.min.js.map +4 -4
  6. package/dist/index.d.ts +3 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +8 -2
  9. package/dist/index.js.map +1 -1
  10. package/dist/lib/Parser.d.ts +53 -6
  11. package/dist/lib/Parser.d.ts.map +1 -1
  12. package/dist/lib/Parser.js +166 -126
  13. package/dist/lib/Parser.js.map +1 -1
  14. package/dist/lib/StringScanner.d.ts +15 -21
  15. package/dist/lib/StringScanner.d.ts.map +1 -1
  16. package/dist/lib/StringScanner.js +63 -86
  17. package/dist/lib/StringScanner.js.map +1 -1
  18. package/dist/lib/XmlDeclaration.d.ts +30 -0
  19. package/dist/lib/XmlDeclaration.d.ts.map +1 -0
  20. package/dist/lib/XmlDeclaration.js +36 -0
  21. package/dist/lib/XmlDeclaration.js.map +1 -0
  22. package/dist/lib/XmlDocument.d.ts +4 -2
  23. package/dist/lib/XmlDocument.d.ts.map +1 -1
  24. package/dist/lib/XmlDocument.js.map +1 -1
  25. package/dist/lib/XmlDocumentType.d.ts +37 -0
  26. package/dist/lib/XmlDocumentType.d.ts.map +1 -0
  27. package/dist/lib/XmlDocumentType.js +39 -0
  28. package/dist/lib/XmlDocumentType.js.map +1 -0
  29. package/dist/lib/XmlElement.js.map +1 -1
  30. package/dist/lib/XmlError.d.ts +24 -0
  31. package/dist/lib/XmlError.d.ts.map +1 -0
  32. package/dist/lib/XmlError.js +52 -0
  33. package/dist/lib/XmlError.js.map +1 -0
  34. package/dist/lib/XmlNode.d.ts +20 -1
  35. package/dist/lib/XmlNode.d.ts.map +1 -1
  36. package/dist/lib/XmlNode.js +28 -3
  37. package/dist/lib/XmlNode.js.map +1 -1
  38. package/dist/lib/syntax.d.ts.map +1 -1
  39. package/dist/lib/syntax.js +18 -23
  40. package/dist/lib/syntax.js.map +1 -1
  41. package/dist/lib/types.d.ts +2 -2
  42. package/dist/lib/types.d.ts.map +1 -1
  43. package/package.json +20 -23
  44. package/src/index.ts +3 -0
  45. package/src/lib/Parser.ts +228 -141
  46. package/src/lib/StringScanner.ts +66 -103
  47. package/src/lib/XmlDeclaration.ts +58 -0
  48. package/src/lib/XmlDocument.ts +4 -2
  49. package/src/lib/XmlDocumentType.ts +67 -0
  50. package/src/lib/XmlError.ts +80 -0
  51. package/src/lib/XmlNode.ts +33 -3
  52. package/src/lib/syntax.ts +12 -18
package/src/lib/Parser.ts CHANGED
@@ -2,14 +2,15 @@ import { StringScanner } from './StringScanner.js';
2
2
  import * as syntax from './syntax.js';
3
3
  import { XmlCdata } from './XmlCdata.js';
4
4
  import { XmlComment } from './XmlComment.js';
5
+ import { XmlDeclaration } from './XmlDeclaration.js';
5
6
  import { XmlDocument } from './XmlDocument.js';
7
+ import { XmlDocumentType } from './XmlDocumentType.js';
6
8
  import { XmlElement } from './XmlElement.js';
9
+ import { XmlError } from './XmlError.js';
10
+ import { XmlNode } from './XmlNode.js';
7
11
  import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
8
12
  import { XmlText } from './XmlText.js';
9
13
 
10
- import type { XmlNode } from './XmlNode.js';
11
-
12
-
13
14
  const emptyString = '';
14
15
 
15
16
  /**
@@ -29,56 +30,67 @@ export class Parser {
29
30
  * @param options Parser options.
30
31
  */
31
32
  constructor(xml: string, options: ParserOptions = {}) {
32
- this.document = new XmlDocument();
33
- this.currentNode = this.document;
34
- this.options = options;
35
- this.scanner = new StringScanner(normalizeXmlString(xml));
33
+ let doc = this.document = new XmlDocument();
36
34
 
37
- this.consumeProlog();
35
+ this.currentNode = doc;
36
+ this.options = options;
37
+ this.scanner = new StringScanner(xml);
38
38
 
39
- if (!this.consumeElement()) {
40
- throw this.error('Root element is missing or invalid');
39
+ if (this.options.includeOffsets) {
40
+ doc.start = 0;
41
+ doc.end = xml.length;
41
42
  }
42
43
 
43
- while (this.consumeMisc()) {} // eslint-disable-line no-empty
44
-
45
- if (!this.scanner.isEnd) {
46
- throw this.error('Extra content at the end of the document');
47
- }
44
+ this.parse();
48
45
  }
49
46
 
50
47
  /**
51
48
  * Adds the given `XmlNode` as a child of `this.currentNode`.
52
49
  */
53
- addNode(node: XmlNode) {
50
+ addNode(node: XmlNode, charIndex: number) {
54
51
  node.parent = this.currentNode;
55
52
 
53
+ if (this.options.includeOffsets) {
54
+ node.start = this.scanner.charIndexToByteIndex(charIndex);
55
+ node.end = this.scanner.charIndexToByteIndex();
56
+ }
57
+
56
58
  // @ts-expect-error: XmlDocument has a more limited set of possible children
57
59
  // than XmlElement so TypeScript is unhappy, but we always do the right
58
60
  // thing.
59
61
  this.currentNode.children.push(node);
62
+ return true;
60
63
  }
61
64
 
62
65
  /**
63
66
  * Adds the given _text_ to the document, either by appending it to a
64
67
  * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
65
68
  */
66
- addText(text: string) {
69
+ addText(text: string, charIndex: number) {
67
70
  let { children } = this.currentNode;
68
71
  let { length } = children;
69
72
 
73
+ text = normalizeLineBreaks(text);
74
+
70
75
  if (length > 0) {
71
76
  let prevNode = children[length - 1];
72
77
 
73
- if (prevNode instanceof XmlText) {
78
+ if (prevNode?.type === XmlNode.TYPE_TEXT) {
79
+ let textNode = prevNode as XmlText;
80
+
74
81
  // The previous node is a text node, so we can append to it and avoid
75
82
  // creating another node.
76
- prevNode.text += text;
77
- return;
83
+ textNode.text += text;
84
+
85
+ if (this.options.includeOffsets) {
86
+ textNode.end = this.scanner.charIndexToByteIndex();
87
+ }
88
+
89
+ return true;
78
90
  }
79
91
  }
80
92
 
81
- this.addNode(new XmlText(text));
93
+ return this.addNode(new XmlText(text), charIndex);
82
94
  }
83
95
 
84
96
  /**
@@ -159,7 +171,7 @@ export class Parser {
159
171
  : syntax.attValueCharSingleQuote;
160
172
 
161
173
  matchLoop: while (!scanner.isEnd) {
162
- chars = scanner.consumeMatch(regex);
174
+ chars = scanner.consumeUntilMatch(regex);
163
175
 
164
176
  if (chars) {
165
177
  this.validateChars(chars);
@@ -178,7 +190,7 @@ export class Parser {
178
190
  case '<':
179
191
  throw this.error('Unescaped `<` is not allowed in an attribute value');
180
192
 
181
- case emptyString:
193
+ default:
182
194
  break matchLoop;
183
195
  }
184
196
  }
@@ -199,25 +211,22 @@ export class Parser {
199
211
  */
200
212
  consumeCdataSection(): boolean {
201
213
  let { scanner } = this;
214
+ let startIndex = scanner.charIndex;
202
215
 
203
- if (!scanner.consumeStringFast('<![CDATA[')) {
216
+ if (!scanner.consumeString('<![CDATA[')) {
204
217
  return false;
205
218
  }
206
219
 
207
220
  let text = scanner.consumeUntilString(']]>');
208
221
  this.validateChars(text);
209
222
 
210
- if (!scanner.consumeStringFast(']]>')) {
223
+ if (!scanner.consumeString(']]>')) {
211
224
  throw this.error('Unclosed CDATA section');
212
225
  }
213
226
 
214
- if (this.options.preserveCdata) {
215
- this.addNode(new XmlCdata(text));
216
- } else {
217
- this.addText(text);
218
- }
219
-
220
- return true;
227
+ return this.options.preserveCdata
228
+ ? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
229
+ : this.addText(text, startIndex);
221
230
  }
222
231
 
223
232
  /**
@@ -228,6 +237,7 @@ export class Parser {
228
237
  */
229
238
  consumeCharData(): boolean {
230
239
  let { scanner } = this;
240
+ let startIndex = scanner.charIndex;
231
241
  let charData = scanner.consumeUntilMatch(syntax.endCharData);
232
242
 
233
243
  if (!charData) {
@@ -240,8 +250,7 @@ export class Parser {
240
250
  throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
241
251
  }
242
252
 
243
- this.addText(charData);
244
- return true;
253
+ return this.addText(charData, startIndex);
245
254
  }
246
255
 
247
256
  /**
@@ -252,15 +261,16 @@ export class Parser {
252
261
  */
253
262
  consumeComment(): boolean {
254
263
  let { scanner } = this;
264
+ let startIndex = scanner.charIndex;
255
265
 
256
- if (!scanner.consumeStringFast('<!--')) {
266
+ if (!scanner.consumeString('<!--')) {
257
267
  return false;
258
268
  }
259
269
 
260
270
  let content = scanner.consumeUntilString('--');
261
271
  this.validateChars(content);
262
272
 
263
- if (!scanner.consumeStringFast('-->')) {
273
+ if (!scanner.consumeString('-->')) {
264
274
  if (scanner.peek(2) === '--') {
265
275
  throw this.error("The string `--` isn't allowed inside a comment");
266
276
  }
@@ -268,11 +278,9 @@ export class Parser {
268
278
  throw this.error('Unclosed comment');
269
279
  }
270
280
 
271
- if (this.options.preserveComments) {
272
- this.addNode(new XmlComment(content.trim()));
273
- }
274
-
275
- return true;
281
+ return this.options.preserveComments
282
+ ? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
283
+ : true;
276
284
  }
277
285
 
278
286
  /**
@@ -285,14 +293,12 @@ export class Parser {
285
293
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
286
294
  */
287
295
  consumeContentReference(): boolean {
296
+ let startIndex = this.scanner.charIndex;
288
297
  let ref = this.consumeReference();
289
298
 
290
- if (ref) {
291
- this.addText(ref);
292
- return true;
293
- }
294
-
295
- return false;
299
+ return ref
300
+ ? this.addText(ref, startIndex)
301
+ : false;
296
302
  }
297
303
 
298
304
  /**
@@ -306,25 +312,68 @@ export class Parser {
306
312
  */
307
313
  consumeDoctypeDeclaration(): boolean {
308
314
  let { scanner } = this;
315
+ let startIndex = scanner.charIndex;
309
316
 
310
- if (!scanner.consumeStringFast('<!DOCTYPE')
311
- || !this.consumeWhitespace()) {
312
-
317
+ if (!scanner.consumeString('<!DOCTYPE')) {
313
318
  return false;
314
319
  }
315
320
 
316
- scanner.consumeMatch(/[^[>]+/y);
321
+ let name = this.consumeWhitespace()
322
+ && this.consumeName();
317
323
 
318
- if (scanner.consumeMatch(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
319
- return true;
324
+ if (!name) {
325
+ throw this.error('Expected a name');
326
+ }
327
+
328
+ let publicId;
329
+ let systemId;
330
+
331
+ if (this.consumeWhitespace()) {
332
+ if (scanner.consumeString('PUBLIC')) {
333
+ publicId = this.consumeWhitespace()
334
+ && this.consumePubidLiteral();
335
+
336
+ if (publicId === false) {
337
+ throw this.error('Expected a public identifier');
338
+ }
339
+
340
+ this.consumeWhitespace();
341
+ }
342
+
343
+ if (publicId !== undefined || scanner.consumeString('SYSTEM')) {
344
+ this.consumeWhitespace();
345
+ systemId = this.consumeSystemLiteral();
346
+
347
+ if (systemId === false) {
348
+ throw this.error('Expected a system identifier');
349
+ }
350
+
351
+ this.consumeWhitespace();
352
+ }
353
+ }
354
+
355
+ let internalSubset;
356
+
357
+ if (scanner.consumeString('[')) {
358
+ // The internal subset may contain comments that contain `]` characters,
359
+ // so we can't use `consumeUntilString()` here.
360
+ internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
361
+
362
+ if (!scanner.consumeString(']')) {
363
+ throw this.error('Unclosed internal subset');
364
+ }
365
+
366
+ this.consumeWhitespace();
320
367
  }
321
368
 
322
- if (!scanner.consumeStringFast('>')) {
369
+ if (!scanner.consumeString('>')) {
323
370
  throw this.error('Unclosed doctype declaration');
324
371
  }
325
372
 
326
- return true;
327
- }
373
+ return this.options.preserveDocumentType
374
+ ? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
375
+ : true;
376
+ }
328
377
 
329
378
  /**
330
379
  * Consumes an element if possible.
@@ -334,27 +383,27 @@ export class Parser {
334
383
  */
335
384
  consumeElement(): boolean {
336
385
  let { scanner } = this;
337
- let mark = scanner.charIndex;
386
+ let startIndex = scanner.charIndex;
338
387
 
339
- if (!scanner.consumeStringFast('<')) {
388
+ if (!scanner.consumeString('<')) {
340
389
  return false;
341
390
  }
342
391
 
343
392
  let name = this.consumeName();
344
393
 
345
394
  if (!name) {
346
- scanner.reset(mark);
395
+ scanner.reset(startIndex);
347
396
  return false;
348
397
  }
349
398
 
350
399
  let attributes = this.consumeAttributes();
351
- let isEmpty = Boolean(scanner.consumeStringFast('/>'));
400
+ let isEmpty = !!scanner.consumeString('/>');
352
401
  let element = new XmlElement(name, attributes);
353
402
 
354
403
  element.parent = this.currentNode;
355
404
 
356
405
  if (!isEmpty) {
357
- if (!scanner.consumeStringFast('>')) {
406
+ if (!scanner.consumeString('>')) {
358
407
  throw this.error(`Unclosed start tag for element \`${name}\``);
359
408
  }
360
409
 
@@ -373,7 +422,7 @@ export class Parser {
373
422
  let endTagMark = scanner.charIndex;
374
423
  let endTagName;
375
424
 
376
- if (!scanner.consumeStringFast('</')
425
+ if (!scanner.consumeString('</')
377
426
  || !(endTagName = this.consumeName())
378
427
  || endTagName !== name) {
379
428
 
@@ -383,15 +432,14 @@ export class Parser {
383
432
 
384
433
  this.consumeWhitespace();
385
434
 
386
- if (!scanner.consumeStringFast('>')) {
435
+ if (!scanner.consumeString('>')) {
387
436
  throw this.error(`Unclosed end tag for element ${name}`);
388
437
  }
389
438
 
390
439
  this.currentNode = element.parent;
391
440
  }
392
441
 
393
- this.addNode(element);
394
- return true;
442
+ return this.addNode(element, startIndex);
395
443
  }
396
444
 
397
445
  /**
@@ -403,7 +451,7 @@ export class Parser {
403
451
  consumeEqual(): boolean {
404
452
  this.consumeWhitespace();
405
453
 
406
- if (this.scanner.consumeStringFast('=')) {
454
+ if (this.scanner.consumeString('=')) {
407
455
  this.consumeWhitespace();
408
456
  return true;
409
457
  }
@@ -443,9 +491,9 @@ export class Parser {
443
491
  */
444
492
  consumeProcessingInstruction(): boolean {
445
493
  let { scanner } = this;
446
- let mark = scanner.charIndex;
494
+ let startIndex = scanner.charIndex;
447
495
 
448
- if (!scanner.consumeStringFast('<?')) {
496
+ if (!scanner.consumeString('<?')) {
449
497
  return false;
450
498
  }
451
499
 
@@ -453,7 +501,7 @@ export class Parser {
453
501
 
454
502
  if (name) {
455
503
  if (name.toLowerCase() === 'xml') {
456
- scanner.reset(mark);
504
+ scanner.reset(startIndex);
457
505
  throw this.error("XML declaration isn't allowed here");
458
506
  }
459
507
  } else {
@@ -461,9 +509,8 @@ export class Parser {
461
509
  }
462
510
 
463
511
  if (!this.consumeWhitespace()) {
464
- if (scanner.consumeStringFast('?>')) {
465
- this.addNode(new XmlProcessingInstruction(name));
466
- return true;
512
+ if (scanner.consumeString('?>')) {
513
+ return this.addNode(new XmlProcessingInstruction(name), startIndex);
467
514
  }
468
515
 
469
516
  throw this.error('Whitespace is required after a processing instruction name');
@@ -472,12 +519,11 @@ export class Parser {
472
519
  let content = scanner.consumeUntilString('?>');
473
520
  this.validateChars(content);
474
521
 
475
- if (!scanner.consumeStringFast('?>')) {
522
+ if (!scanner.consumeString('?>')) {
476
523
  throw this.error('Unterminated processing instruction');
477
524
  }
478
525
 
479
- this.addNode(new XmlProcessingInstruction(name, content));
480
- return true;
526
+ return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
481
527
  }
482
528
 
483
529
  /**
@@ -488,7 +534,7 @@ export class Parser {
488
534
  */
489
535
  consumeProlog(): boolean {
490
536
  let { scanner } = this;
491
- let mark = scanner.charIndex;
537
+ let startIndex = scanner.charIndex;
492
538
 
493
539
  this.consumeXmlDeclaration();
494
540
 
@@ -498,7 +544,29 @@ export class Parser {
498
544
  while (this.consumeMisc()) {} // eslint-disable-line no-empty
499
545
  }
500
546
 
501
- return mark < scanner.charIndex;
547
+ return startIndex < scanner.charIndex;
548
+ }
549
+
550
+ /**
551
+ * Consumes a public identifier literal if possible.
552
+ *
553
+ * @returns
554
+ * Value of the public identifier literal minus quotes, or `false` if
555
+ * nothing was consumed. An empty string indicates that a public id literal
556
+ * was consumed but was empty.
557
+ *
558
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
559
+ */
560
+ consumePubidLiteral(): string | false {
561
+ let startIndex = this.scanner.charIndex;
562
+ let value = this.consumeSystemLiteral();
563
+
564
+ if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
565
+ this.scanner.reset(startIndex);
566
+ throw this.error('Invalid character in public identifier');
567
+ }
568
+
569
+ return value;
502
570
  }
503
571
 
504
572
  /**
@@ -516,7 +584,7 @@ export class Parser {
516
584
  consumeReference(): string | false {
517
585
  let { scanner } = this;
518
586
 
519
- if (!scanner.consumeStringFast('&')) {
587
+ if (!scanner.consumeString('&')) {
520
588
  return false;
521
589
  }
522
590
 
@@ -596,7 +664,7 @@ export class Parser {
596
664
  */
597
665
  consumeSystemLiteral(): string | false {
598
666
  let { scanner } = this;
599
- let quote = scanner.consumeStringFast('"') || scanner.consumeStringFast("'");
667
+ let quote = scanner.consumeString('"') || scanner.consumeString("'");
600
668
 
601
669
  if (!quote) {
602
670
  return false;
@@ -605,7 +673,7 @@ export class Parser {
605
673
  let value = scanner.consumeUntilString(quote);
606
674
  this.validateChars(value);
607
675
 
608
- if (!scanner.consumeStringFast(quote)) {
676
+ if (!scanner.consumeString(quote)) {
609
677
  throw this.error('Missing end quote');
610
678
  }
611
679
 
@@ -619,7 +687,7 @@ export class Parser {
619
687
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
620
688
  */
621
689
  consumeWhitespace(): boolean {
622
- return Boolean(this.scanner.consumeMatchFn(syntax.isWhitespace));
690
+ return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
623
691
  }
624
692
 
625
693
  /**
@@ -630,8 +698,9 @@ export class Parser {
630
698
  */
631
699
  consumeXmlDeclaration(): boolean {
632
700
  let { scanner } = this;
701
+ let startIndex = scanner.charIndex;
633
702
 
634
- if (!scanner.consumeStringFast('<?xml')) {
703
+ if (!scanner.consumeString('<?xml')) {
635
704
  return false;
636
705
  }
637
706
 
@@ -639,7 +708,7 @@ export class Parser {
639
708
  throw this.error('Invalid XML declaration');
640
709
  }
641
710
 
642
- let version = Boolean(scanner.consumeStringFast('version'))
711
+ let version = !!scanner.consumeString('version')
643
712
  && this.consumeEqual()
644
713
  && this.consumeSystemLiteral();
645
714
 
@@ -649,16 +718,22 @@ export class Parser {
649
718
  throw this.error('Invalid character in version number');
650
719
  }
651
720
 
721
+ let encoding;
722
+ let standalone;
723
+
652
724
  if (this.consumeWhitespace()) {
653
- let encoding = Boolean(scanner.consumeStringFast('encoding'))
725
+ encoding = !!scanner.consumeString('encoding')
654
726
  && this.consumeEqual()
655
727
  && this.consumeSystemLiteral();
656
728
 
657
729
  if (encoding) {
730
+ if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
731
+ throw this.error('Invalid character in encoding name');
732
+ }
658
733
  this.consumeWhitespace();
659
734
  }
660
735
 
661
- let standalone = Boolean(scanner.consumeStringFast('standalone'))
736
+ standalone = !!scanner.consumeString('standalone')
662
737
  && this.consumeEqual()
663
738
  && this.consumeSystemLiteral();
664
739
 
@@ -671,69 +746,43 @@ export class Parser {
671
746
  }
672
747
  }
673
748
 
674
- if (!scanner.consumeStringFast('?>')) {
749
+ if (!scanner.consumeString('?>')) {
675
750
  throw this.error('Invalid or unclosed XML declaration');
676
751
  }
677
752
 
678
- return true;
753
+ return this.options.preserveXmlDeclaration
754
+ ? this.addNode(new XmlDeclaration(
755
+ version,
756
+ encoding || undefined,
757
+ (standalone as 'yes' | 'no' | false) || undefined,
758
+ ), startIndex)
759
+ : true;
679
760
  }
680
761
 
681
762
  /**
682
- * Throws an error at the current scanner position.
763
+ * Returns an `XmlError` for the current scanner position.
683
764
  */
684
765
  error(message: string) {
685
- let { charIndex, string: xml } = this.scanner;
686
- let column = 1;
687
- let excerpt = '';
688
- let line = 1;
689
-
690
- // Find the line and column where the error occurred.
691
- for (let i = 0; i < charIndex; ++i) {
692
- let char = xml[i];
693
-
694
- if (char === '\n') {
695
- column = 1;
696
- excerpt = '';
697
- line += 1;
698
- } else {
699
- column += 1;
700
- excerpt += char;
701
- }
702
- }
703
-
704
- let eol = xml.indexOf('\n', charIndex);
705
-
706
- excerpt += eol === -1
707
- ? xml.slice(charIndex)
708
- : xml.slice(charIndex, eol);
766
+ let { scanner } = this;
767
+ return new XmlError(message, scanner.charIndex, scanner.string);
768
+ }
709
769
 
710
- let excerptStart = 0;
770
+ /**
771
+ * Parses the XML input.
772
+ */
773
+ parse() {
774
+ this.scanner.consumeString('\uFEFF'); // byte order mark
775
+ this.consumeProlog();
711
776
 
712
- // Keep the excerpt below 50 chars, but always keep the error position in
713
- // view.
714
- if (excerpt.length > 50) {
715
- if (column < 40) {
716
- excerpt = excerpt.slice(0, 50);
717
- } else {
718
- excerptStart = column - 20;
719
- excerpt = excerpt.slice(excerptStart, column + 30);
720
- }
777
+ if (!this.consumeElement()) {
778
+ throw this.error('Root element is missing or invalid');
721
779
  }
722
780
 
723
- let err = new Error(
724
- `${message} (line ${line}, column ${column})\n`
725
- + ` ${excerpt}\n`
726
- + ' '.repeat(column - excerptStart + 1) + '^\n',
727
- );
728
-
729
- Object.assign(err, {
730
- column,
731
- excerpt,
732
- line,
733
- pos: charIndex,
734
- });
781
+ while (this.consumeMisc()) {} // eslint-disable-line no-empty
735
782
 
736
- return err;
783
+ if (!this.scanner.isEnd) {
784
+ throw this.error('Extra content at the end of the document');
785
+ }
737
786
  }
738
787
 
739
788
  /**
@@ -761,15 +810,19 @@ export class Parser {
761
810
  // -- Private Functions --------------------------------------------------------
762
811
 
763
812
  /**
764
- * Normalizes the given XML string by stripping a byte order mark (if present)
765
- * and replacing CRLF sequences and lone CR characters with LF characters.
813
+ * Normalizes line breaks in the given text by replacing CRLF sequences and lone
814
+ * CR characters with LF characters.
766
815
  */
767
- function normalizeXmlString(xml: string): string {
768
- if (xml[0] === '\uFEFF') {
769
- xml = xml.slice(1);
816
+ function normalizeLineBreaks(text: string): string {
817
+ let i = 0;
818
+
819
+ while ((i = text.indexOf('\r', i)) !== -1) {
820
+ text = text[i + 1] === '\n'
821
+ ? text.slice(0, i) + text.slice(i + 1)
822
+ : text.slice(0, i) + '\n' + text.slice(i + 1);
770
823
  }
771
824
 
772
- return xml.replace(/\r\n?/g, '\n');
825
+ return text;
773
826
  }
774
827
 
775
828
  // -- Types --------------------------------------------------------------------
@@ -782,6 +835,14 @@ export type ParserOptions = {
782
835
  */
783
836
  ignoreUndefinedEntities?: boolean;
784
837
 
838
+ /**
839
+ * When `true`, the starting and ending byte offsets of each node in the input
840
+ * string will be made available via `start` and `end` properties on the node.
841
+ *
842
+ * @default false
843
+ */
844
+ includeOffsets?: boolean;
845
+
785
846
  /**
786
847
  * When `true`, CDATA sections will be preserved in the document as `XmlCdata`
787
848
  * nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
@@ -799,6 +860,32 @@ export type ParserOptions = {
799
860
  */
800
861
  preserveComments?: boolean;
801
862
 
863
+ /**
864
+ * When `true`, a document type declaration (if present) will be preserved in
865
+ * the document as an `XmlDocumentType` node. Otherwise the declaration will
866
+ * not be included in the node tree.
867
+ *
868
+ * Note that when this is `true` and a document type declaration is present,
869
+ * the DTD will precede the root node in the node tree (normally the root
870
+ * node would be first).
871
+ *
872
+ * @default false
873
+ */
874
+ preserveDocumentType?: boolean;
875
+
876
+ /**
877
+ * When `true`, an XML declaration (if present) will be preserved in the
878
+ * document as an `XmlDeclaration` node. Otherwise the declaration will not be
879
+ * included in the node tree.
880
+ *
881
+ * Note that when this is `true` and an XML declaration is present, the
882
+ * XML declaration will be the first child of the document (normally the root
883
+ * node would be first).
884
+ *
885
+ * @default false
886
+ */
887
+ preserveXmlDeclaration?: boolean;
888
+
802
889
  /**
803
890
  * When an undefined named entity is encountered, this function will be called
804
891
  * with the entity as its only argument. It should return a string value with