@rgrove/parse-xml 4.0.1 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +40 -25
  2. package/dist/browser.js +642 -223
  3. package/dist/browser.js.map +4 -4
  4. package/dist/global.min.js +9 -8
  5. package/dist/global.min.js.map +4 -4
  6. package/dist/index.d.ts +3 -0
  7. package/dist/index.d.ts.map +1 -1
  8. package/dist/index.js +7 -1
  9. package/dist/index.js.map +1 -1
  10. package/dist/lib/Parser.d.ts +49 -6
  11. package/dist/lib/Parser.d.ts.map +1 -1
  12. package/dist/lib/Parser.js +133 -102
  13. package/dist/lib/Parser.js.map +1 -1
  14. package/dist/lib/StringScanner.d.ts +5 -5
  15. package/dist/lib/StringScanner.d.ts.map +1 -1
  16. package/dist/lib/StringScanner.js +9 -9
  17. package/dist/lib/StringScanner.js.map +1 -1
  18. package/dist/lib/XmlDeclaration.d.ts +30 -0
  19. package/dist/lib/XmlDeclaration.d.ts.map +1 -0
  20. package/dist/lib/XmlDeclaration.js +36 -0
  21. package/dist/lib/XmlDeclaration.js.map +1 -0
  22. package/dist/lib/XmlDocument.d.ts +4 -2
  23. package/dist/lib/XmlDocument.d.ts.map +1 -1
  24. package/dist/lib/XmlDocument.js.map +1 -1
  25. package/dist/lib/XmlDocumentType.d.ts +37 -0
  26. package/dist/lib/XmlDocumentType.d.ts.map +1 -0
  27. package/dist/lib/XmlDocumentType.js +39 -0
  28. package/dist/lib/XmlDocumentType.js.map +1 -0
  29. package/dist/lib/XmlError.d.ts +24 -0
  30. package/dist/lib/XmlError.d.ts.map +1 -0
  31. package/dist/lib/XmlError.js +52 -0
  32. package/dist/lib/XmlError.js.map +1 -0
  33. package/dist/lib/XmlNode.d.ts +20 -1
  34. package/dist/lib/XmlNode.d.ts.map +1 -1
  35. package/dist/lib/XmlNode.js +28 -3
  36. package/dist/lib/XmlNode.js.map +1 -1
  37. package/dist/lib/syntax.d.ts.map +1 -1
  38. package/dist/lib/syntax.js +1 -1
  39. package/dist/lib/syntax.js.map +1 -1
  40. package/dist/lib/types.d.ts +2 -2
  41. package/dist/lib/types.d.ts.map +1 -1
  42. package/package.json +20 -18
  43. package/src/index.ts +3 -0
  44. package/src/lib/Parser.ts +195 -118
  45. package/src/lib/StringScanner.ts +10 -10
  46. package/src/lib/XmlDeclaration.ts +58 -0
  47. package/src/lib/XmlDocument.ts +4 -2
  48. package/src/lib/XmlDocumentType.ts +67 -0
  49. package/src/lib/XmlError.ts +80 -0
  50. package/src/lib/XmlNode.ts +33 -3
  51. package/src/lib/syntax.ts +1 -1
package/src/lib/Parser.ts CHANGED
@@ -2,14 +2,15 @@ import { StringScanner } from './StringScanner.js';
2
2
  import * as syntax from './syntax.js';
3
3
  import { XmlCdata } from './XmlCdata.js';
4
4
  import { XmlComment } from './XmlComment.js';
5
+ import { XmlDeclaration } from './XmlDeclaration.js';
5
6
  import { XmlDocument } from './XmlDocument.js';
7
+ import { XmlDocumentType } from './XmlDocumentType.js';
6
8
  import { XmlElement } from './XmlElement.js';
9
+ import { XmlError } from './XmlError.js';
10
+ import { XmlNode } from './XmlNode.js';
7
11
  import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
8
12
  import { XmlText } from './XmlText.js';
9
13
 
10
- import type { XmlNode } from './XmlNode.js';
11
-
12
-
13
14
  const emptyString = '';
14
15
 
15
16
  /**
@@ -29,11 +30,18 @@ export class Parser {
29
30
  * @param options Parser options.
30
31
  */
31
32
  constructor(xml: string, options: ParserOptions = {}) {
32
- this.document = new XmlDocument();
33
- this.currentNode = this.document;
33
+ let doc = this.document = new XmlDocument();
34
+ let scanner = this.scanner = new StringScanner(xml);
35
+
36
+ this.currentNode = doc;
34
37
  this.options = options;
35
- this.scanner = new StringScanner(normalizeXmlString(xml));
36
38
 
39
+ if (this.options.includeOffsets) {
40
+ doc.start = 0;
41
+ doc.end = xml.length;
42
+ }
43
+
44
+ scanner.consumeStringFast('\uFEFF'); // byte order mark
37
45
  this.consumeProlog();
38
46
 
39
47
  if (!this.consumeElement()) {
@@ -42,7 +50,7 @@ export class Parser {
42
50
 
43
51
  while (this.consumeMisc()) {} // eslint-disable-line no-empty
44
52
 
45
- if (!this.scanner.isEnd) {
53
+ if (!scanner.isEnd) {
46
54
  throw this.error('Extra content at the end of the document');
47
55
  }
48
56
  }
@@ -50,35 +58,50 @@ export class Parser {
50
58
  /**
51
59
  * Adds the given `XmlNode` as a child of `this.currentNode`.
52
60
  */
53
- addNode(node: XmlNode) {
61
+ addNode(node: XmlNode, charIndex: number) {
54
62
  node.parent = this.currentNode;
55
63
 
64
+ if (this.options.includeOffsets) {
65
+ node.start = this.scanner.charIndexToByteIndex(charIndex);
66
+ node.end = this.scanner.charIndexToByteIndex();
67
+ }
68
+
56
69
  // @ts-expect-error: XmlDocument has a more limited set of possible children
57
70
  // than XmlElement so TypeScript is unhappy, but we always do the right
58
71
  // thing.
59
72
  this.currentNode.children.push(node);
73
+ return true;
60
74
  }
61
75
 
62
76
  /**
63
77
  * Adds the given _text_ to the document, either by appending it to a
64
78
  * preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
65
79
  */
66
- addText(text: string) {
80
+ addText(text: string, charIndex: number) {
67
81
  let { children } = this.currentNode;
68
82
  let { length } = children;
69
83
 
84
+ text = normalizeLineBreaks(text);
85
+
70
86
  if (length > 0) {
71
87
  let prevNode = children[length - 1];
72
88
 
73
- if (prevNode instanceof XmlText) {
89
+ if (prevNode?.type === XmlNode.TYPE_TEXT) {
90
+ let textNode = prevNode as XmlText;
91
+
74
92
  // The previous node is a text node, so we can append to it and avoid
75
93
  // creating another node.
76
- prevNode.text += text;
77
- return;
94
+ textNode.text += text;
95
+
96
+ if (this.options.includeOffsets) {
97
+ textNode.end = this.scanner.charIndexToByteIndex();
98
+ }
99
+
100
+ return true;
78
101
  }
79
102
  }
80
103
 
81
- this.addNode(new XmlText(text));
104
+ return this.addNode(new XmlText(text), charIndex);
82
105
  }
83
106
 
84
107
  /**
@@ -199,6 +222,7 @@ export class Parser {
199
222
  */
200
223
  consumeCdataSection(): boolean {
201
224
  let { scanner } = this;
225
+ let startIndex = scanner.charIndex;
202
226
 
203
227
  if (!scanner.consumeStringFast('<![CDATA[')) {
204
228
  return false;
@@ -211,13 +235,9 @@ export class Parser {
211
235
  throw this.error('Unclosed CDATA section');
212
236
  }
213
237
 
214
- if (this.options.preserveCdata) {
215
- this.addNode(new XmlCdata(text));
216
- } else {
217
- this.addText(text);
218
- }
219
-
220
- return true;
238
+ return this.options.preserveCdata
239
+ ? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
240
+ : this.addText(text, startIndex);
221
241
  }
222
242
 
223
243
  /**
@@ -228,6 +248,7 @@ export class Parser {
228
248
  */
229
249
  consumeCharData(): boolean {
230
250
  let { scanner } = this;
251
+ let startIndex = scanner.charIndex;
231
252
  let charData = scanner.consumeUntilMatch(syntax.endCharData);
232
253
 
233
254
  if (!charData) {
@@ -240,8 +261,7 @@ export class Parser {
240
261
  throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
241
262
  }
242
263
 
243
- this.addText(charData);
244
- return true;
264
+ return this.addText(charData, startIndex);
245
265
  }
246
266
 
247
267
  /**
@@ -252,6 +272,7 @@ export class Parser {
252
272
  */
253
273
  consumeComment(): boolean {
254
274
  let { scanner } = this;
275
+ let startIndex = scanner.charIndex;
255
276
 
256
277
  if (!scanner.consumeStringFast('<!--')) {
257
278
  return false;
@@ -268,11 +289,9 @@ export class Parser {
268
289
  throw this.error('Unclosed comment');
269
290
  }
270
291
 
271
- if (this.options.preserveComments) {
272
- this.addNode(new XmlComment(content.trim()));
273
- }
274
-
275
- return true;
292
+ return this.options.preserveComments
293
+ ? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
294
+ : true;
276
295
  }
277
296
 
278
297
  /**
@@ -285,14 +304,12 @@ export class Parser {
285
304
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
286
305
  */
287
306
  consumeContentReference(): boolean {
307
+ let startIndex = this.scanner.charIndex;
288
308
  let ref = this.consumeReference();
289
309
 
290
- if (ref) {
291
- this.addText(ref);
292
- return true;
293
- }
294
-
295
- return false;
310
+ return ref
311
+ ? this.addText(ref, startIndex)
312
+ : false;
296
313
  }
297
314
 
298
315
  /**
@@ -306,25 +323,68 @@ export class Parser {
306
323
  */
307
324
  consumeDoctypeDeclaration(): boolean {
308
325
  let { scanner } = this;
326
+ let startIndex = scanner.charIndex;
309
327
 
310
- if (!scanner.consumeStringFast('<!DOCTYPE')
311
- || !this.consumeWhitespace()) {
312
-
328
+ if (!scanner.consumeStringFast('<!DOCTYPE')) {
313
329
  return false;
314
330
  }
315
331
 
316
- scanner.consumeMatch(/[^[>]+/y);
332
+ let name = this.consumeWhitespace()
333
+ && this.consumeName();
317
334
 
318
- if (scanner.consumeMatch(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
319
- return true;
335
+ if (!name) {
336
+ throw this.error('Expected a name');
337
+ }
338
+
339
+ let publicId;
340
+ let systemId;
341
+
342
+ if (this.consumeWhitespace()) {
343
+ if (scanner.consumeStringFast('PUBLIC')) {
344
+ publicId = this.consumeWhitespace()
345
+ && this.consumePubidLiteral();
346
+
347
+ if (publicId === false) {
348
+ throw this.error('Expected a public identifier');
349
+ }
350
+
351
+ this.consumeWhitespace();
352
+ }
353
+
354
+ if (publicId !== undefined || scanner.consumeStringFast('SYSTEM')) {
355
+ this.consumeWhitespace();
356
+ systemId = this.consumeSystemLiteral();
357
+
358
+ if (systemId === false) {
359
+ throw this.error('Expected a system identifier');
360
+ }
361
+
362
+ this.consumeWhitespace();
363
+ }
364
+ }
365
+
366
+ let internalSubset;
367
+
368
+ if (scanner.consumeStringFast('[')) {
369
+ // The internal subset may contain comments that contain `]` characters,
370
+ // so we can't use `consumeUntilString()` here.
371
+ internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
372
+
373
+ if (!scanner.consumeStringFast(']')) {
374
+ throw this.error('Unclosed internal subset');
375
+ }
376
+
377
+ this.consumeWhitespace();
320
378
  }
321
379
 
322
380
  if (!scanner.consumeStringFast('>')) {
323
381
  throw this.error('Unclosed doctype declaration');
324
382
  }
325
383
 
326
- return true;
327
- }
384
+ return this.options.preserveDocumentType
385
+ ? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
386
+ : true;
387
+ }
328
388
 
329
389
  /**
330
390
  * Consumes an element if possible.
@@ -334,7 +394,7 @@ export class Parser {
334
394
  */
335
395
  consumeElement(): boolean {
336
396
  let { scanner } = this;
337
- let mark = scanner.charIndex;
397
+ let startIndex = scanner.charIndex;
338
398
 
339
399
  if (!scanner.consumeStringFast('<')) {
340
400
  return false;
@@ -343,12 +403,12 @@ export class Parser {
343
403
  let name = this.consumeName();
344
404
 
345
405
  if (!name) {
346
- scanner.reset(mark);
406
+ scanner.reset(startIndex);
347
407
  return false;
348
408
  }
349
409
 
350
410
  let attributes = this.consumeAttributes();
351
- let isEmpty = Boolean(scanner.consumeStringFast('/>'));
411
+ let isEmpty = !!scanner.consumeStringFast('/>');
352
412
  let element = new XmlElement(name, attributes);
353
413
 
354
414
  element.parent = this.currentNode;
@@ -390,8 +450,7 @@ export class Parser {
390
450
  this.currentNode = element.parent;
391
451
  }
392
452
 
393
- this.addNode(element);
394
- return true;
453
+ return this.addNode(element, startIndex);
395
454
  }
396
455
 
397
456
  /**
@@ -443,7 +502,7 @@ export class Parser {
443
502
  */
444
503
  consumeProcessingInstruction(): boolean {
445
504
  let { scanner } = this;
446
- let mark = scanner.charIndex;
505
+ let startIndex = scanner.charIndex;
447
506
 
448
507
  if (!scanner.consumeStringFast('<?')) {
449
508
  return false;
@@ -453,7 +512,7 @@ export class Parser {
453
512
 
454
513
  if (name) {
455
514
  if (name.toLowerCase() === 'xml') {
456
- scanner.reset(mark);
515
+ scanner.reset(startIndex);
457
516
  throw this.error("XML declaration isn't allowed here");
458
517
  }
459
518
  } else {
@@ -462,8 +521,7 @@ export class Parser {
462
521
 
463
522
  if (!this.consumeWhitespace()) {
464
523
  if (scanner.consumeStringFast('?>')) {
465
- this.addNode(new XmlProcessingInstruction(name));
466
- return true;
524
+ return this.addNode(new XmlProcessingInstruction(name), startIndex);
467
525
  }
468
526
 
469
527
  throw this.error('Whitespace is required after a processing instruction name');
@@ -476,8 +534,7 @@ export class Parser {
476
534
  throw this.error('Unterminated processing instruction');
477
535
  }
478
536
 
479
- this.addNode(new XmlProcessingInstruction(name, content));
480
- return true;
537
+ return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
481
538
  }
482
539
 
483
540
  /**
@@ -488,7 +545,7 @@ export class Parser {
488
545
  */
489
546
  consumeProlog(): boolean {
490
547
  let { scanner } = this;
491
- let mark = scanner.charIndex;
548
+ let startIndex = scanner.charIndex;
492
549
 
493
550
  this.consumeXmlDeclaration();
494
551
 
@@ -498,7 +555,29 @@ export class Parser {
498
555
  while (this.consumeMisc()) {} // eslint-disable-line no-empty
499
556
  }
500
557
 
501
- return mark < scanner.charIndex;
558
+ return startIndex < scanner.charIndex;
559
+ }
560
+
561
+ /**
562
+ * Consumes a public identifier literal if possible.
563
+ *
564
+ * @returns
565
+ * Value of the public identifier literal minus quotes, or `false` if
566
+ * nothing was consumed. An empty string indicates that a public id literal
567
+ * was consumed but was empty.
568
+ *
569
+ * @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
570
+ */
571
+ consumePubidLiteral(): string | false {
572
+ let startIndex = this.scanner.charIndex;
573
+ let value = this.consumeSystemLiteral();
574
+
575
+ if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
576
+ this.scanner.reset(startIndex);
577
+ throw this.error('Invalid character in public identifier');
578
+ }
579
+
580
+ return value;
502
581
  }
503
582
 
504
583
  /**
@@ -619,7 +698,7 @@ export class Parser {
619
698
  * @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
620
699
  */
621
700
  consumeWhitespace(): boolean {
622
- return Boolean(this.scanner.consumeMatchFn(syntax.isWhitespace));
701
+ return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
623
702
  }
624
703
 
625
704
  /**
@@ -630,6 +709,7 @@ export class Parser {
630
709
  */
631
710
  consumeXmlDeclaration(): boolean {
632
711
  let { scanner } = this;
712
+ let startIndex = scanner.charIndex;
633
713
 
634
714
  if (!scanner.consumeStringFast('<?xml')) {
635
715
  return false;
@@ -639,7 +719,7 @@ export class Parser {
639
719
  throw this.error('Invalid XML declaration');
640
720
  }
641
721
 
642
- let version = Boolean(scanner.consumeStringFast('version'))
722
+ let version = !!scanner.consumeStringFast('version')
643
723
  && this.consumeEqual()
644
724
  && this.consumeSystemLiteral();
645
725
 
@@ -649,8 +729,11 @@ export class Parser {
649
729
  throw this.error('Invalid character in version number');
650
730
  }
651
731
 
732
+ let encoding;
733
+ let standalone;
734
+
652
735
  if (this.consumeWhitespace()) {
653
- let encoding = Boolean(scanner.consumeStringFast('encoding'))
736
+ encoding = !!scanner.consumeStringFast('encoding')
654
737
  && this.consumeEqual()
655
738
  && this.consumeSystemLiteral();
656
739
 
@@ -658,7 +741,7 @@ export class Parser {
658
741
  this.consumeWhitespace();
659
742
  }
660
743
 
661
- let standalone = Boolean(scanner.consumeStringFast('standalone'))
744
+ standalone = !!scanner.consumeStringFast('standalone')
662
745
  && this.consumeEqual()
663
746
  && this.consumeSystemLiteral();
664
747
 
@@ -675,65 +758,21 @@ export class Parser {
675
758
  throw this.error('Invalid or unclosed XML declaration');
676
759
  }
677
760
 
678
- return true;
761
+ return this.options.preserveXmlDeclaration
762
+ ? this.addNode(new XmlDeclaration(
763
+ version,
764
+ encoding || undefined,
765
+ (standalone as 'yes' | 'no' | false) || undefined,
766
+ ), startIndex)
767
+ : true;
679
768
  }
680
769
 
681
770
  /**
682
- * Throws an error at the current scanner position.
771
+ * Returns an `XmlError` for the current scanner position.
683
772
  */
684
773
  error(message: string) {
685
- let { charIndex, string: xml } = this.scanner;
686
- let column = 1;
687
- let excerpt = '';
688
- let line = 1;
689
-
690
- // Find the line and column where the error occurred.
691
- for (let i = 0; i < charIndex; ++i) {
692
- let char = xml[i];
693
-
694
- if (char === '\n') {
695
- column = 1;
696
- excerpt = '';
697
- line += 1;
698
- } else {
699
- column += 1;
700
- excerpt += char;
701
- }
702
- }
703
-
704
- let eol = xml.indexOf('\n', charIndex);
705
-
706
- excerpt += eol === -1
707
- ? xml.slice(charIndex)
708
- : xml.slice(charIndex, eol);
709
-
710
- let excerptStart = 0;
711
-
712
- // Keep the excerpt below 50 chars, but always keep the error position in
713
- // view.
714
- if (excerpt.length > 50) {
715
- if (column < 40) {
716
- excerpt = excerpt.slice(0, 50);
717
- } else {
718
- excerptStart = column - 20;
719
- excerpt = excerpt.slice(excerptStart, column + 30);
720
- }
721
- }
722
-
723
- let err = new Error(
724
- `${message} (line ${line}, column ${column})\n`
725
- + ` ${excerpt}\n`
726
- + ' '.repeat(column - excerptStart + 1) + '^\n',
727
- );
728
-
729
- Object.assign(err, {
730
- column,
731
- excerpt,
732
- line,
733
- pos: charIndex,
734
- });
735
-
736
- return err;
774
+ let { scanner } = this;
775
+ return new XmlError(message, scanner.charIndex, scanner.string);
737
776
  }
738
777
 
739
778
  /**
@@ -761,15 +800,19 @@ export class Parser {
761
800
  // -- Private Functions --------------------------------------------------------
762
801
 
763
802
  /**
764
- * Normalizes the given XML string by stripping a byte order mark (if present)
765
- * and replacing CRLF sequences and lone CR characters with LF characters.
803
+ * Normalizes line breaks in the given text by replacing CRLF sequences and lone
804
+ * CR characters with LF characters.
766
805
  */
767
- function normalizeXmlString(xml: string): string {
768
- if (xml[0] === '\uFEFF') {
769
- xml = xml.slice(1);
806
+ function normalizeLineBreaks(text: string): string {
807
+ let i = 0;
808
+
809
+ while ((i = text.indexOf('\r', i)) !== -1) {
810
+ text = text[i + 1] === '\n'
811
+ ? text.slice(0, i) + text.slice(i + 1)
812
+ : text.slice(0, i) + '\n' + text.slice(i + 1);
770
813
  }
771
814
 
772
- return xml.replace(/\r\n?/g, '\n');
815
+ return text;
773
816
  }
774
817
 
775
818
  // -- Types --------------------------------------------------------------------
@@ -782,6 +825,14 @@ export type ParserOptions = {
782
825
  */
783
826
  ignoreUndefinedEntities?: boolean;
784
827
 
828
+ /**
829
+ * When `true`, the starting and ending byte offsets of each node in the input
830
+ * string will be made available via `start` and `end` properties on the node.
831
+ *
832
+ * @default false
833
+ */
834
+ includeOffsets?: boolean;
835
+
785
836
  /**
786
837
  * When `true`, CDATA sections will be preserved in the document as `XmlCdata`
787
838
  * nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
@@ -799,6 +850,32 @@ export type ParserOptions = {
799
850
  */
800
851
  preserveComments?: boolean;
801
852
 
853
+ /**
854
+ * When `true`, a document type declaration (if present) will be preserved in
855
+ * the document as an `XmlDocumentType` node. Otherwise the declaration will
856
+ * not be included in the node tree.
857
+ *
858
+ * Note that when this is `true` and a document type declaration is present,
859
+ * the DTD will precede the root node in the node tree (normally the root
860
+ * node would be first).
861
+ *
862
+ * @default false
863
+ */
864
+ preserveDocumentType?: boolean;
865
+
866
+ /**
867
+ * When `true`, an XML declaration (if present) will be preserved in the
868
+ * document as an `XmlDeclaration` node. Otherwise the declaration will not be
869
+ * included in the node tree.
870
+ *
871
+ * Note that when this is `true` and an XML declaration is present, the
872
+ * XML declaration will be the first child of the document (normally the root
873
+ * node would be first).
874
+ *
875
+ * @default false
876
+ */
877
+ preserveXmlDeclaration?: boolean;
878
+
802
879
  /**
803
880
  * When an undefined named entity is encountered, this function will be called
804
881
  * with the entity as its only argument. It should return a string value with
@@ -42,16 +42,6 @@ export class StringScanner {
42
42
 
43
43
  // -- Protected Methods ------------------------------------------------------
44
44
 
45
- /**
46
- * Returns the byte index of the given character index in the string. The two
47
- * may differ in strings that contain multibyte characters.
48
- */
49
- protected charIndexToByteIndex(charIndex: number = this.charIndex): number {
50
- return this.multiByteMode
51
- ? (this.charsToBytes as number[])[charIndex] ?? Infinity
52
- : charIndex;
53
- }
54
-
55
45
  /**
56
46
  * Returns the number of characters in the given string, which may differ from
57
47
  * the byte length if the string contains multibyte characters.
@@ -75,6 +65,16 @@ export class StringScanner {
75
65
  this.charIndex = Math.min(this.charCount, this.charIndex + count);
76
66
  }
77
67
 
68
+ /**
69
+ * Returns the byte index of the given character index in the string. The two
70
+ * may differ in strings that contain multibyte characters.
71
+ */
72
+ charIndexToByteIndex(charIndex: number = this.charIndex): number {
73
+ return this.multiByteMode
74
+ ? (this.charsToBytes as number[])[charIndex] ?? Infinity
75
+ : charIndex;
76
+ }
77
+
78
78
  /**
79
79
  * Consumes and returns the given number of characters if possible, advancing
80
80
  * the scanner and stopping if the end of the string is reached.
@@ -0,0 +1,58 @@
1
+ import { XmlNode } from './XmlNode.js';
2
+
3
+ /**
4
+ * An XML declaration within an XML document.
5
+ *
6
+ * @example
7
+ *
8
+ * ```xml
9
+ * <?xml version="1.0" encoding="UTF-8"?>
10
+ * ```
11
+ */
12
+ export class XmlDeclaration extends XmlNode {
13
+ /**
14
+ * Value of the encoding declaration in this XML declaration, or `null` if no
15
+ * encoding declaration was present.
16
+ */
17
+ encoding: string | null;
18
+
19
+ /**
20
+ * Value of the standalone declaration in this XML declaration, or `null` if
21
+ * no standalone declaration was present.
22
+ */
23
+ standalone: 'yes' | 'no' | null;
24
+
25
+ /**
26
+ * Value of the version declaration in this XML declaration.
27
+ */
28
+ version: string;
29
+
30
+ constructor(
31
+ version: string,
32
+ encoding?: string,
33
+ standalone?: typeof XmlDeclaration.prototype.standalone,
34
+ ) {
35
+ super();
36
+
37
+ this.version = version;
38
+ this.encoding = encoding ?? null;
39
+ this.standalone = standalone ?? null;
40
+ }
41
+
42
+ override get type() {
43
+ return XmlNode.TYPE_XML_DECLARATION;
44
+ }
45
+
46
+ override toJSON() {
47
+ let json = XmlNode.prototype.toJSON.call(this);
48
+ json.version = this.version;
49
+
50
+ for (let key of ['encoding', 'standalone'] as const) {
51
+ if (this[key] !== null) {
52
+ json[key] = this[key];
53
+ }
54
+ }
55
+
56
+ return json;
57
+ }
58
+ }
@@ -2,6 +2,8 @@ import { XmlElement } from './XmlElement.js';
2
2
  import { XmlNode } from './XmlNode.js';
3
3
 
4
4
  import type { XmlComment } from './XmlComment.js';
5
+ import type { XmlDeclaration } from './XmlDeclaration.js';
6
+ import type { XmlDocumentType } from './XmlDocumentType.js';
5
7
  import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
6
8
 
7
9
  /**
@@ -12,9 +14,9 @@ export class XmlDocument extends XmlNode {
12
14
  /**
13
15
  * Child nodes of this document.
14
16
  */
15
- readonly children: Array<XmlComment | XmlProcessingInstruction | XmlElement>;
17
+ readonly children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlProcessingInstruction | XmlElement>;
16
18
 
17
- constructor(children: Array<XmlComment | XmlElement | XmlProcessingInstruction> = []) {
19
+ constructor(children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlElement | XmlProcessingInstruction> = []) {
18
20
  super();
19
21
  this.children = children;
20
22
  }