@rgrove/parse-xml 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -25
- package/dist/browser.js +642 -223
- package/dist/browser.js.map +4 -4
- package/dist/global.min.js +9 -8
- package/dist/global.min.js.map +4 -4
- package/dist/index.d.ts +4 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/Parser.d.ts +49 -6
- package/dist/lib/Parser.d.ts.map +1 -1
- package/dist/lib/Parser.js +133 -102
- package/dist/lib/Parser.js.map +1 -1
- package/dist/lib/StringScanner.d.ts +5 -5
- package/dist/lib/StringScanner.d.ts.map +1 -1
- package/dist/lib/StringScanner.js +9 -9
- package/dist/lib/StringScanner.js.map +1 -1
- package/dist/lib/XmlDeclaration.d.ts +30 -0
- package/dist/lib/XmlDeclaration.d.ts.map +1 -0
- package/dist/lib/XmlDeclaration.js +36 -0
- package/dist/lib/XmlDeclaration.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +4 -2
- package/dist/lib/XmlDocument.d.ts.map +1 -1
- package/dist/lib/XmlDocument.js.map +1 -1
- package/dist/lib/XmlDocumentType.d.ts +37 -0
- package/dist/lib/XmlDocumentType.d.ts.map +1 -0
- package/dist/lib/XmlDocumentType.js +39 -0
- package/dist/lib/XmlDocumentType.js.map +1 -0
- package/dist/lib/XmlError.d.ts +24 -0
- package/dist/lib/XmlError.d.ts.map +1 -0
- package/dist/lib/XmlError.js +52 -0
- package/dist/lib/XmlError.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +20 -1
- package/dist/lib/XmlNode.d.ts.map +1 -1
- package/dist/lib/XmlNode.js +28 -3
- package/dist/lib/XmlNode.js.map +1 -1
- package/dist/lib/syntax.d.ts.map +1 -1
- package/dist/lib/syntax.js +1 -1
- package/dist/lib/syntax.js.map +1 -1
- package/dist/lib/types.d.ts +2 -2
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +28 -27
- package/src/index.ts +4 -1
- package/src/lib/Parser.ts +195 -118
- package/src/lib/StringScanner.ts +10 -10
- package/src/lib/XmlDeclaration.ts +58 -0
- package/src/lib/XmlDocument.ts +4 -2
- package/src/lib/XmlDocumentType.ts +67 -0
- package/src/lib/XmlError.ts +80 -0
- package/src/lib/XmlNode.ts +33 -3
- package/src/lib/syntax.ts +1 -1
package/src/lib/Parser.ts
CHANGED
|
@@ -2,14 +2,15 @@ import { StringScanner } from './StringScanner.js';
|
|
|
2
2
|
import * as syntax from './syntax.js';
|
|
3
3
|
import { XmlCdata } from './XmlCdata.js';
|
|
4
4
|
import { XmlComment } from './XmlComment.js';
|
|
5
|
+
import { XmlDeclaration } from './XmlDeclaration.js';
|
|
5
6
|
import { XmlDocument } from './XmlDocument.js';
|
|
7
|
+
import { XmlDocumentType } from './XmlDocumentType.js';
|
|
6
8
|
import { XmlElement } from './XmlElement.js';
|
|
9
|
+
import { XmlError } from './XmlError.js';
|
|
10
|
+
import { XmlNode } from './XmlNode.js';
|
|
7
11
|
import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
|
8
12
|
import { XmlText } from './XmlText.js';
|
|
9
13
|
|
|
10
|
-
import type { XmlNode } from './XmlNode.js';
|
|
11
|
-
|
|
12
|
-
|
|
13
14
|
const emptyString = '';
|
|
14
15
|
|
|
15
16
|
/**
|
|
@@ -29,11 +30,18 @@ export class Parser {
|
|
|
29
30
|
* @param options Parser options.
|
|
30
31
|
*/
|
|
31
32
|
constructor(xml: string, options: ParserOptions = {}) {
|
|
32
|
-
this.document = new XmlDocument();
|
|
33
|
-
this.
|
|
33
|
+
let doc = this.document = new XmlDocument();
|
|
34
|
+
let scanner = this.scanner = new StringScanner(xml);
|
|
35
|
+
|
|
36
|
+
this.currentNode = doc;
|
|
34
37
|
this.options = options;
|
|
35
|
-
this.scanner = new StringScanner(normalizeXmlString(xml));
|
|
36
38
|
|
|
39
|
+
if (this.options.includeOffsets) {
|
|
40
|
+
doc.start = 0;
|
|
41
|
+
doc.end = xml.length;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
scanner.consumeStringFast('\uFEFF'); // byte order mark
|
|
37
45
|
this.consumeProlog();
|
|
38
46
|
|
|
39
47
|
if (!this.consumeElement()) {
|
|
@@ -42,7 +50,7 @@ export class Parser {
|
|
|
42
50
|
|
|
43
51
|
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
44
52
|
|
|
45
|
-
if (!
|
|
53
|
+
if (!scanner.isEnd) {
|
|
46
54
|
throw this.error('Extra content at the end of the document');
|
|
47
55
|
}
|
|
48
56
|
}
|
|
@@ -50,35 +58,50 @@ export class Parser {
|
|
|
50
58
|
/**
|
|
51
59
|
* Adds the given `XmlNode` as a child of `this.currentNode`.
|
|
52
60
|
*/
|
|
53
|
-
addNode(node: XmlNode) {
|
|
61
|
+
addNode(node: XmlNode, charIndex: number) {
|
|
54
62
|
node.parent = this.currentNode;
|
|
55
63
|
|
|
64
|
+
if (this.options.includeOffsets) {
|
|
65
|
+
node.start = this.scanner.charIndexToByteIndex(charIndex);
|
|
66
|
+
node.end = this.scanner.charIndexToByteIndex();
|
|
67
|
+
}
|
|
68
|
+
|
|
56
69
|
// @ts-expect-error: XmlDocument has a more limited set of possible children
|
|
57
70
|
// than XmlElement so TypeScript is unhappy, but we always do the right
|
|
58
71
|
// thing.
|
|
59
72
|
this.currentNode.children.push(node);
|
|
73
|
+
return true;
|
|
60
74
|
}
|
|
61
75
|
|
|
62
76
|
/**
|
|
63
77
|
* Adds the given _text_ to the document, either by appending it to a
|
|
64
78
|
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
|
|
65
79
|
*/
|
|
66
|
-
addText(text: string) {
|
|
80
|
+
addText(text: string, charIndex: number) {
|
|
67
81
|
let { children } = this.currentNode;
|
|
68
82
|
let { length } = children;
|
|
69
83
|
|
|
84
|
+
text = normalizeLineBreaks(text);
|
|
85
|
+
|
|
70
86
|
if (length > 0) {
|
|
71
87
|
let prevNode = children[length - 1];
|
|
72
88
|
|
|
73
|
-
if (prevNode
|
|
89
|
+
if (prevNode?.type === XmlNode.TYPE_TEXT) {
|
|
90
|
+
let textNode = prevNode as XmlText;
|
|
91
|
+
|
|
74
92
|
// The previous node is a text node, so we can append to it and avoid
|
|
75
93
|
// creating another node.
|
|
76
|
-
|
|
77
|
-
|
|
94
|
+
textNode.text += text;
|
|
95
|
+
|
|
96
|
+
if (this.options.includeOffsets) {
|
|
97
|
+
textNode.end = this.scanner.charIndexToByteIndex();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return true;
|
|
78
101
|
}
|
|
79
102
|
}
|
|
80
103
|
|
|
81
|
-
this.addNode(new XmlText(text));
|
|
104
|
+
return this.addNode(new XmlText(text), charIndex);
|
|
82
105
|
}
|
|
83
106
|
|
|
84
107
|
/**
|
|
@@ -199,6 +222,7 @@ export class Parser {
|
|
|
199
222
|
*/
|
|
200
223
|
consumeCdataSection(): boolean {
|
|
201
224
|
let { scanner } = this;
|
|
225
|
+
let startIndex = scanner.charIndex;
|
|
202
226
|
|
|
203
227
|
if (!scanner.consumeStringFast('<![CDATA[')) {
|
|
204
228
|
return false;
|
|
@@ -211,13 +235,9 @@ export class Parser {
|
|
|
211
235
|
throw this.error('Unclosed CDATA section');
|
|
212
236
|
}
|
|
213
237
|
|
|
214
|
-
|
|
215
|
-
this.addNode(new XmlCdata(text))
|
|
216
|
-
|
|
217
|
-
this.addText(text);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
return true;
|
|
238
|
+
return this.options.preserveCdata
|
|
239
|
+
? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
|
|
240
|
+
: this.addText(text, startIndex);
|
|
221
241
|
}
|
|
222
242
|
|
|
223
243
|
/**
|
|
@@ -228,6 +248,7 @@ export class Parser {
|
|
|
228
248
|
*/
|
|
229
249
|
consumeCharData(): boolean {
|
|
230
250
|
let { scanner } = this;
|
|
251
|
+
let startIndex = scanner.charIndex;
|
|
231
252
|
let charData = scanner.consumeUntilMatch(syntax.endCharData);
|
|
232
253
|
|
|
233
254
|
if (!charData) {
|
|
@@ -240,8 +261,7 @@ export class Parser {
|
|
|
240
261
|
throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
|
|
241
262
|
}
|
|
242
263
|
|
|
243
|
-
this.addText(charData);
|
|
244
|
-
return true;
|
|
264
|
+
return this.addText(charData, startIndex);
|
|
245
265
|
}
|
|
246
266
|
|
|
247
267
|
/**
|
|
@@ -252,6 +272,7 @@ export class Parser {
|
|
|
252
272
|
*/
|
|
253
273
|
consumeComment(): boolean {
|
|
254
274
|
let { scanner } = this;
|
|
275
|
+
let startIndex = scanner.charIndex;
|
|
255
276
|
|
|
256
277
|
if (!scanner.consumeStringFast('<!--')) {
|
|
257
278
|
return false;
|
|
@@ -268,11 +289,9 @@ export class Parser {
|
|
|
268
289
|
throw this.error('Unclosed comment');
|
|
269
290
|
}
|
|
270
291
|
|
|
271
|
-
|
|
272
|
-
this.addNode(new XmlComment(content
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
return true;
|
|
292
|
+
return this.options.preserveComments
|
|
293
|
+
? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
|
|
294
|
+
: true;
|
|
276
295
|
}
|
|
277
296
|
|
|
278
297
|
/**
|
|
@@ -285,14 +304,12 @@ export class Parser {
|
|
|
285
304
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
|
286
305
|
*/
|
|
287
306
|
consumeContentReference(): boolean {
|
|
307
|
+
let startIndex = this.scanner.charIndex;
|
|
288
308
|
let ref = this.consumeReference();
|
|
289
309
|
|
|
290
|
-
|
|
291
|
-
this.addText(ref)
|
|
292
|
-
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
return false;
|
|
310
|
+
return ref
|
|
311
|
+
? this.addText(ref, startIndex)
|
|
312
|
+
: false;
|
|
296
313
|
}
|
|
297
314
|
|
|
298
315
|
/**
|
|
@@ -306,25 +323,68 @@ export class Parser {
|
|
|
306
323
|
*/
|
|
307
324
|
consumeDoctypeDeclaration(): boolean {
|
|
308
325
|
let { scanner } = this;
|
|
326
|
+
let startIndex = scanner.charIndex;
|
|
309
327
|
|
|
310
|
-
if (!scanner.consumeStringFast('<!DOCTYPE')
|
|
311
|
-
|| !this.consumeWhitespace()) {
|
|
312
|
-
|
|
328
|
+
if (!scanner.consumeStringFast('<!DOCTYPE')) {
|
|
313
329
|
return false;
|
|
314
330
|
}
|
|
315
331
|
|
|
316
|
-
|
|
332
|
+
let name = this.consumeWhitespace()
|
|
333
|
+
&& this.consumeName();
|
|
317
334
|
|
|
318
|
-
if (
|
|
319
|
-
|
|
335
|
+
if (!name) {
|
|
336
|
+
throw this.error('Expected a name');
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
let publicId;
|
|
340
|
+
let systemId;
|
|
341
|
+
|
|
342
|
+
if (this.consumeWhitespace()) {
|
|
343
|
+
if (scanner.consumeStringFast('PUBLIC')) {
|
|
344
|
+
publicId = this.consumeWhitespace()
|
|
345
|
+
&& this.consumePubidLiteral();
|
|
346
|
+
|
|
347
|
+
if (publicId === false) {
|
|
348
|
+
throw this.error('Expected a public identifier');
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
this.consumeWhitespace();
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
if (publicId !== undefined || scanner.consumeStringFast('SYSTEM')) {
|
|
355
|
+
this.consumeWhitespace();
|
|
356
|
+
systemId = this.consumeSystemLiteral();
|
|
357
|
+
|
|
358
|
+
if (systemId === false) {
|
|
359
|
+
throw this.error('Expected a system identifier');
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
this.consumeWhitespace();
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
let internalSubset;
|
|
367
|
+
|
|
368
|
+
if (scanner.consumeStringFast('[')) {
|
|
369
|
+
// The internal subset may contain comments that contain `]` characters,
|
|
370
|
+
// so we can't use `consumeUntilString()` here.
|
|
371
|
+
internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
|
|
372
|
+
|
|
373
|
+
if (!scanner.consumeStringFast(']')) {
|
|
374
|
+
throw this.error('Unclosed internal subset');
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
this.consumeWhitespace();
|
|
320
378
|
}
|
|
321
379
|
|
|
322
380
|
if (!scanner.consumeStringFast('>')) {
|
|
323
381
|
throw this.error('Unclosed doctype declaration');
|
|
324
382
|
}
|
|
325
383
|
|
|
326
|
-
return
|
|
327
|
-
|
|
384
|
+
return this.options.preserveDocumentType
|
|
385
|
+
? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
|
|
386
|
+
: true;
|
|
387
|
+
}
|
|
328
388
|
|
|
329
389
|
/**
|
|
330
390
|
* Consumes an element if possible.
|
|
@@ -334,7 +394,7 @@ export class Parser {
|
|
|
334
394
|
*/
|
|
335
395
|
consumeElement(): boolean {
|
|
336
396
|
let { scanner } = this;
|
|
337
|
-
let
|
|
397
|
+
let startIndex = scanner.charIndex;
|
|
338
398
|
|
|
339
399
|
if (!scanner.consumeStringFast('<')) {
|
|
340
400
|
return false;
|
|
@@ -343,12 +403,12 @@ export class Parser {
|
|
|
343
403
|
let name = this.consumeName();
|
|
344
404
|
|
|
345
405
|
if (!name) {
|
|
346
|
-
scanner.reset(
|
|
406
|
+
scanner.reset(startIndex);
|
|
347
407
|
return false;
|
|
348
408
|
}
|
|
349
409
|
|
|
350
410
|
let attributes = this.consumeAttributes();
|
|
351
|
-
let isEmpty =
|
|
411
|
+
let isEmpty = !!scanner.consumeStringFast('/>');
|
|
352
412
|
let element = new XmlElement(name, attributes);
|
|
353
413
|
|
|
354
414
|
element.parent = this.currentNode;
|
|
@@ -390,8 +450,7 @@ export class Parser {
|
|
|
390
450
|
this.currentNode = element.parent;
|
|
391
451
|
}
|
|
392
452
|
|
|
393
|
-
this.addNode(element);
|
|
394
|
-
return true;
|
|
453
|
+
return this.addNode(element, startIndex);
|
|
395
454
|
}
|
|
396
455
|
|
|
397
456
|
/**
|
|
@@ -443,7 +502,7 @@ export class Parser {
|
|
|
443
502
|
*/
|
|
444
503
|
consumeProcessingInstruction(): boolean {
|
|
445
504
|
let { scanner } = this;
|
|
446
|
-
let
|
|
505
|
+
let startIndex = scanner.charIndex;
|
|
447
506
|
|
|
448
507
|
if (!scanner.consumeStringFast('<?')) {
|
|
449
508
|
return false;
|
|
@@ -453,7 +512,7 @@ export class Parser {
|
|
|
453
512
|
|
|
454
513
|
if (name) {
|
|
455
514
|
if (name.toLowerCase() === 'xml') {
|
|
456
|
-
scanner.reset(
|
|
515
|
+
scanner.reset(startIndex);
|
|
457
516
|
throw this.error("XML declaration isn't allowed here");
|
|
458
517
|
}
|
|
459
518
|
} else {
|
|
@@ -462,8 +521,7 @@ export class Parser {
|
|
|
462
521
|
|
|
463
522
|
if (!this.consumeWhitespace()) {
|
|
464
523
|
if (scanner.consumeStringFast('?>')) {
|
|
465
|
-
this.addNode(new XmlProcessingInstruction(name));
|
|
466
|
-
return true;
|
|
524
|
+
return this.addNode(new XmlProcessingInstruction(name), startIndex);
|
|
467
525
|
}
|
|
468
526
|
|
|
469
527
|
throw this.error('Whitespace is required after a processing instruction name');
|
|
@@ -476,8 +534,7 @@ export class Parser {
|
|
|
476
534
|
throw this.error('Unterminated processing instruction');
|
|
477
535
|
}
|
|
478
536
|
|
|
479
|
-
this.addNode(new XmlProcessingInstruction(name, content));
|
|
480
|
-
return true;
|
|
537
|
+
return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
|
|
481
538
|
}
|
|
482
539
|
|
|
483
540
|
/**
|
|
@@ -488,7 +545,7 @@ export class Parser {
|
|
|
488
545
|
*/
|
|
489
546
|
consumeProlog(): boolean {
|
|
490
547
|
let { scanner } = this;
|
|
491
|
-
let
|
|
548
|
+
let startIndex = scanner.charIndex;
|
|
492
549
|
|
|
493
550
|
this.consumeXmlDeclaration();
|
|
494
551
|
|
|
@@ -498,7 +555,29 @@ export class Parser {
|
|
|
498
555
|
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
499
556
|
}
|
|
500
557
|
|
|
501
|
-
return
|
|
558
|
+
return startIndex < scanner.charIndex;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* Consumes a public identifier literal if possible.
|
|
563
|
+
*
|
|
564
|
+
* @returns
|
|
565
|
+
* Value of the public identifier literal minus quotes, or `false` if
|
|
566
|
+
* nothing was consumed. An empty string indicates that a public id literal
|
|
567
|
+
* was consumed but was empty.
|
|
568
|
+
*
|
|
569
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
|
|
570
|
+
*/
|
|
571
|
+
consumePubidLiteral(): string | false {
|
|
572
|
+
let startIndex = this.scanner.charIndex;
|
|
573
|
+
let value = this.consumeSystemLiteral();
|
|
574
|
+
|
|
575
|
+
if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
|
|
576
|
+
this.scanner.reset(startIndex);
|
|
577
|
+
throw this.error('Invalid character in public identifier');
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
return value;
|
|
502
581
|
}
|
|
503
582
|
|
|
504
583
|
/**
|
|
@@ -619,7 +698,7 @@ export class Parser {
|
|
|
619
698
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
620
699
|
*/
|
|
621
700
|
consumeWhitespace(): boolean {
|
|
622
|
-
return
|
|
701
|
+
return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
|
|
623
702
|
}
|
|
624
703
|
|
|
625
704
|
/**
|
|
@@ -630,6 +709,7 @@ export class Parser {
|
|
|
630
709
|
*/
|
|
631
710
|
consumeXmlDeclaration(): boolean {
|
|
632
711
|
let { scanner } = this;
|
|
712
|
+
let startIndex = scanner.charIndex;
|
|
633
713
|
|
|
634
714
|
if (!scanner.consumeStringFast('<?xml')) {
|
|
635
715
|
return false;
|
|
@@ -639,7 +719,7 @@ export class Parser {
|
|
|
639
719
|
throw this.error('Invalid XML declaration');
|
|
640
720
|
}
|
|
641
721
|
|
|
642
|
-
let version =
|
|
722
|
+
let version = !!scanner.consumeStringFast('version')
|
|
643
723
|
&& this.consumeEqual()
|
|
644
724
|
&& this.consumeSystemLiteral();
|
|
645
725
|
|
|
@@ -649,8 +729,11 @@ export class Parser {
|
|
|
649
729
|
throw this.error('Invalid character in version number');
|
|
650
730
|
}
|
|
651
731
|
|
|
732
|
+
let encoding;
|
|
733
|
+
let standalone;
|
|
734
|
+
|
|
652
735
|
if (this.consumeWhitespace()) {
|
|
653
|
-
|
|
736
|
+
encoding = !!scanner.consumeStringFast('encoding')
|
|
654
737
|
&& this.consumeEqual()
|
|
655
738
|
&& this.consumeSystemLiteral();
|
|
656
739
|
|
|
@@ -658,7 +741,7 @@ export class Parser {
|
|
|
658
741
|
this.consumeWhitespace();
|
|
659
742
|
}
|
|
660
743
|
|
|
661
|
-
|
|
744
|
+
standalone = !!scanner.consumeStringFast('standalone')
|
|
662
745
|
&& this.consumeEqual()
|
|
663
746
|
&& this.consumeSystemLiteral();
|
|
664
747
|
|
|
@@ -675,65 +758,21 @@ export class Parser {
|
|
|
675
758
|
throw this.error('Invalid or unclosed XML declaration');
|
|
676
759
|
}
|
|
677
760
|
|
|
678
|
-
return
|
|
761
|
+
return this.options.preserveXmlDeclaration
|
|
762
|
+
? this.addNode(new XmlDeclaration(
|
|
763
|
+
version,
|
|
764
|
+
encoding || undefined,
|
|
765
|
+
(standalone as 'yes' | 'no' | false) || undefined,
|
|
766
|
+
), startIndex)
|
|
767
|
+
: true;
|
|
679
768
|
}
|
|
680
769
|
|
|
681
770
|
/**
|
|
682
|
-
*
|
|
771
|
+
* Returns an `XmlError` for the current scanner position.
|
|
683
772
|
*/
|
|
684
773
|
error(message: string) {
|
|
685
|
-
let {
|
|
686
|
-
|
|
687
|
-
let excerpt = '';
|
|
688
|
-
let line = 1;
|
|
689
|
-
|
|
690
|
-
// Find the line and column where the error occurred.
|
|
691
|
-
for (let i = 0; i < charIndex; ++i) {
|
|
692
|
-
let char = xml[i];
|
|
693
|
-
|
|
694
|
-
if (char === '\n') {
|
|
695
|
-
column = 1;
|
|
696
|
-
excerpt = '';
|
|
697
|
-
line += 1;
|
|
698
|
-
} else {
|
|
699
|
-
column += 1;
|
|
700
|
-
excerpt += char;
|
|
701
|
-
}
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
let eol = xml.indexOf('\n', charIndex);
|
|
705
|
-
|
|
706
|
-
excerpt += eol === -1
|
|
707
|
-
? xml.slice(charIndex)
|
|
708
|
-
: xml.slice(charIndex, eol);
|
|
709
|
-
|
|
710
|
-
let excerptStart = 0;
|
|
711
|
-
|
|
712
|
-
// Keep the excerpt below 50 chars, but always keep the error position in
|
|
713
|
-
// view.
|
|
714
|
-
if (excerpt.length > 50) {
|
|
715
|
-
if (column < 40) {
|
|
716
|
-
excerpt = excerpt.slice(0, 50);
|
|
717
|
-
} else {
|
|
718
|
-
excerptStart = column - 20;
|
|
719
|
-
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
720
|
-
}
|
|
721
|
-
}
|
|
722
|
-
|
|
723
|
-
let err = new Error(
|
|
724
|
-
`${message} (line ${line}, column ${column})\n`
|
|
725
|
-
+ ` ${excerpt}\n`
|
|
726
|
-
+ ' '.repeat(column - excerptStart + 1) + '^\n',
|
|
727
|
-
);
|
|
728
|
-
|
|
729
|
-
Object.assign(err, {
|
|
730
|
-
column,
|
|
731
|
-
excerpt,
|
|
732
|
-
line,
|
|
733
|
-
pos: charIndex,
|
|
734
|
-
});
|
|
735
|
-
|
|
736
|
-
return err;
|
|
774
|
+
let { scanner } = this;
|
|
775
|
+
return new XmlError(message, scanner.charIndex, scanner.string);
|
|
737
776
|
}
|
|
738
777
|
|
|
739
778
|
/**
|
|
@@ -761,15 +800,19 @@ export class Parser {
|
|
|
761
800
|
// -- Private Functions --------------------------------------------------------
|
|
762
801
|
|
|
763
802
|
/**
|
|
764
|
-
* Normalizes the given
|
|
765
|
-
*
|
|
803
|
+
* Normalizes line breaks in the given text by replacing CRLF sequences and lone
|
|
804
|
+
* CR characters with LF characters.
|
|
766
805
|
*/
|
|
767
|
-
function
|
|
768
|
-
|
|
769
|
-
|
|
806
|
+
function normalizeLineBreaks(text: string): string {
|
|
807
|
+
let i = 0;
|
|
808
|
+
|
|
809
|
+
while ((i = text.indexOf('\r', i)) !== -1) {
|
|
810
|
+
text = text[i + 1] === '\n'
|
|
811
|
+
? text.slice(0, i) + text.slice(i + 1)
|
|
812
|
+
: text.slice(0, i) + '\n' + text.slice(i + 1);
|
|
770
813
|
}
|
|
771
814
|
|
|
772
|
-
return
|
|
815
|
+
return text;
|
|
773
816
|
}
|
|
774
817
|
|
|
775
818
|
// -- Types --------------------------------------------------------------------
|
|
@@ -782,6 +825,14 @@ export type ParserOptions = {
|
|
|
782
825
|
*/
|
|
783
826
|
ignoreUndefinedEntities?: boolean;
|
|
784
827
|
|
|
828
|
+
/**
|
|
829
|
+
* When `true`, the starting and ending byte offsets of each node in the input
|
|
830
|
+
* string will be made available via `start` and `end` properties on the node.
|
|
831
|
+
*
|
|
832
|
+
* @default false
|
|
833
|
+
*/
|
|
834
|
+
includeOffsets?: boolean;
|
|
835
|
+
|
|
785
836
|
/**
|
|
786
837
|
* When `true`, CDATA sections will be preserved in the document as `XmlCdata`
|
|
787
838
|
* nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
|
|
@@ -799,6 +850,32 @@ export type ParserOptions = {
|
|
|
799
850
|
*/
|
|
800
851
|
preserveComments?: boolean;
|
|
801
852
|
|
|
853
|
+
/**
|
|
854
|
+
* When `true`, a document type declaration (if present) will be preserved in
|
|
855
|
+
* the document as an `XmlDocumentType` node. Otherwise the declaration will
|
|
856
|
+
* not be included in the node tree.
|
|
857
|
+
*
|
|
858
|
+
* Note that when this is `true` and a document type declaration is present,
|
|
859
|
+
* the DTD will precede the root node in the node tree (normally the root
|
|
860
|
+
* node would be first).
|
|
861
|
+
*
|
|
862
|
+
* @default false
|
|
863
|
+
*/
|
|
864
|
+
preserveDocumentType?: boolean;
|
|
865
|
+
|
|
866
|
+
/**
|
|
867
|
+
* When `true`, an XML declaration (if present) will be preserved in the
|
|
868
|
+
* document as an `XmlDeclaration` node. Otherwise the declaration will not be
|
|
869
|
+
* included in the node tree.
|
|
870
|
+
*
|
|
871
|
+
* Note that when this is `true` and an XML declaration is present, the
|
|
872
|
+
* XML declaration will be the first child of the document (normally the root
|
|
873
|
+
* node would be first).
|
|
874
|
+
*
|
|
875
|
+
* @default false
|
|
876
|
+
*/
|
|
877
|
+
preserveXmlDeclaration?: boolean;
|
|
878
|
+
|
|
802
879
|
/**
|
|
803
880
|
* When an undefined named entity is encountered, this function will be called
|
|
804
881
|
* with the entity as its only argument. It should return a string value with
|
package/src/lib/StringScanner.ts
CHANGED
|
@@ -42,16 +42,6 @@ export class StringScanner {
|
|
|
42
42
|
|
|
43
43
|
// -- Protected Methods ------------------------------------------------------
|
|
44
44
|
|
|
45
|
-
/**
|
|
46
|
-
* Returns the byte index of the given character index in the string. The two
|
|
47
|
-
* may differ in strings that contain multibyte characters.
|
|
48
|
-
*/
|
|
49
|
-
protected charIndexToByteIndex(charIndex: number = this.charIndex): number {
|
|
50
|
-
return this.multiByteMode
|
|
51
|
-
? (this.charsToBytes as number[])[charIndex] ?? Infinity
|
|
52
|
-
: charIndex;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
45
|
/**
|
|
56
46
|
* Returns the number of characters in the given string, which may differ from
|
|
57
47
|
* the byte length if the string contains multibyte characters.
|
|
@@ -75,6 +65,16 @@ export class StringScanner {
|
|
|
75
65
|
this.charIndex = Math.min(this.charCount, this.charIndex + count);
|
|
76
66
|
}
|
|
77
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Returns the byte index of the given character index in the string. The two
|
|
70
|
+
* may differ in strings that contain multibyte characters.
|
|
71
|
+
*/
|
|
72
|
+
charIndexToByteIndex(charIndex: number = this.charIndex): number {
|
|
73
|
+
return this.multiByteMode
|
|
74
|
+
? (this.charsToBytes as number[])[charIndex] ?? Infinity
|
|
75
|
+
: charIndex;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
78
|
/**
|
|
79
79
|
* Consumes and returns the given number of characters if possible, advancing
|
|
80
80
|
* the scanner and stopping if the end of the string is reached.
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { XmlNode } from './XmlNode.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* An XML declaration within an XML document.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
*
|
|
8
|
+
* ```xml
|
|
9
|
+
* <?xml version="1.0" encoding="UTF-8"?>
|
|
10
|
+
* ```
|
|
11
|
+
*/
|
|
12
|
+
export class XmlDeclaration extends XmlNode {
|
|
13
|
+
/**
|
|
14
|
+
* Value of the encoding declaration in this XML declaration, or `null` if no
|
|
15
|
+
* encoding declaration was present.
|
|
16
|
+
*/
|
|
17
|
+
encoding: string | null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Value of the standalone declaration in this XML declaration, or `null` if
|
|
21
|
+
* no standalone declaration was present.
|
|
22
|
+
*/
|
|
23
|
+
standalone: 'yes' | 'no' | null;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Value of the version declaration in this XML declaration.
|
|
27
|
+
*/
|
|
28
|
+
version: string;
|
|
29
|
+
|
|
30
|
+
constructor(
|
|
31
|
+
version: string,
|
|
32
|
+
encoding?: string,
|
|
33
|
+
standalone?: typeof XmlDeclaration.prototype.standalone,
|
|
34
|
+
) {
|
|
35
|
+
super();
|
|
36
|
+
|
|
37
|
+
this.version = version;
|
|
38
|
+
this.encoding = encoding ?? null;
|
|
39
|
+
this.standalone = standalone ?? null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
override get type() {
|
|
43
|
+
return XmlNode.TYPE_XML_DECLARATION;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
override toJSON() {
|
|
47
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
48
|
+
json.version = this.version;
|
|
49
|
+
|
|
50
|
+
for (let key of ['encoding', 'standalone'] as const) {
|
|
51
|
+
if (this[key] !== null) {
|
|
52
|
+
json[key] = this[key];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return json;
|
|
57
|
+
}
|
|
58
|
+
}
|
package/src/lib/XmlDocument.ts
CHANGED
|
@@ -2,6 +2,8 @@ import { XmlElement } from './XmlElement.js';
|
|
|
2
2
|
import { XmlNode } from './XmlNode.js';
|
|
3
3
|
|
|
4
4
|
import type { XmlComment } from './XmlComment.js';
|
|
5
|
+
import type { XmlDeclaration } from './XmlDeclaration.js';
|
|
6
|
+
import type { XmlDocumentType } from './XmlDocumentType.js';
|
|
5
7
|
import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
|
6
8
|
|
|
7
9
|
/**
|
|
@@ -12,9 +14,9 @@ export class XmlDocument extends XmlNode {
|
|
|
12
14
|
/**
|
|
13
15
|
* Child nodes of this document.
|
|
14
16
|
*/
|
|
15
|
-
readonly children: Array<XmlComment | XmlProcessingInstruction | XmlElement>;
|
|
17
|
+
readonly children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlProcessingInstruction | XmlElement>;
|
|
16
18
|
|
|
17
|
-
constructor(children: Array<XmlComment | XmlElement | XmlProcessingInstruction> = []) {
|
|
19
|
+
constructor(children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlElement | XmlProcessingInstruction> = []) {
|
|
18
20
|
super();
|
|
19
21
|
this.children = children;
|
|
20
22
|
}
|