@rgrove/parse-xml 4.0.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -31
- package/dist/browser.js +692 -300
- package/dist/browser.js.map +4 -4
- package/dist/global.min.js +9 -8
- package/dist/global.min.js.map +4 -4
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -2
- package/dist/index.js.map +1 -1
- package/dist/lib/Parser.d.ts +53 -6
- package/dist/lib/Parser.d.ts.map +1 -1
- package/dist/lib/Parser.js +166 -126
- package/dist/lib/Parser.js.map +1 -1
- package/dist/lib/StringScanner.d.ts +15 -21
- package/dist/lib/StringScanner.d.ts.map +1 -1
- package/dist/lib/StringScanner.js +63 -86
- package/dist/lib/StringScanner.js.map +1 -1
- package/dist/lib/XmlDeclaration.d.ts +30 -0
- package/dist/lib/XmlDeclaration.d.ts.map +1 -0
- package/dist/lib/XmlDeclaration.js +36 -0
- package/dist/lib/XmlDeclaration.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +4 -2
- package/dist/lib/XmlDocument.d.ts.map +1 -1
- package/dist/lib/XmlDocument.js.map +1 -1
- package/dist/lib/XmlDocumentType.d.ts +37 -0
- package/dist/lib/XmlDocumentType.d.ts.map +1 -0
- package/dist/lib/XmlDocumentType.js +39 -0
- package/dist/lib/XmlDocumentType.js.map +1 -0
- package/dist/lib/XmlElement.js.map +1 -1
- package/dist/lib/XmlError.d.ts +24 -0
- package/dist/lib/XmlError.d.ts.map +1 -0
- package/dist/lib/XmlError.js +52 -0
- package/dist/lib/XmlError.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +20 -1
- package/dist/lib/XmlNode.d.ts.map +1 -1
- package/dist/lib/XmlNode.js +28 -3
- package/dist/lib/XmlNode.js.map +1 -1
- package/dist/lib/syntax.d.ts.map +1 -1
- package/dist/lib/syntax.js +18 -23
- package/dist/lib/syntax.js.map +1 -1
- package/dist/lib/types.d.ts +2 -2
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +20 -23
- package/src/index.ts +3 -0
- package/src/lib/Parser.ts +228 -141
- package/src/lib/StringScanner.ts +66 -103
- package/src/lib/XmlDeclaration.ts +58 -0
- package/src/lib/XmlDocument.ts +4 -2
- package/src/lib/XmlDocumentType.ts +67 -0
- package/src/lib/XmlError.ts +80 -0
- package/src/lib/XmlNode.ts +33 -3
- package/src/lib/syntax.ts +12 -18
package/src/lib/Parser.ts
CHANGED
|
@@ -2,14 +2,15 @@ import { StringScanner } from './StringScanner.js';
|
|
|
2
2
|
import * as syntax from './syntax.js';
|
|
3
3
|
import { XmlCdata } from './XmlCdata.js';
|
|
4
4
|
import { XmlComment } from './XmlComment.js';
|
|
5
|
+
import { XmlDeclaration } from './XmlDeclaration.js';
|
|
5
6
|
import { XmlDocument } from './XmlDocument.js';
|
|
7
|
+
import { XmlDocumentType } from './XmlDocumentType.js';
|
|
6
8
|
import { XmlElement } from './XmlElement.js';
|
|
9
|
+
import { XmlError } from './XmlError.js';
|
|
10
|
+
import { XmlNode } from './XmlNode.js';
|
|
7
11
|
import { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
|
8
12
|
import { XmlText } from './XmlText.js';
|
|
9
13
|
|
|
10
|
-
import type { XmlNode } from './XmlNode.js';
|
|
11
|
-
|
|
12
|
-
|
|
13
14
|
const emptyString = '';
|
|
14
15
|
|
|
15
16
|
/**
|
|
@@ -29,56 +30,67 @@ export class Parser {
|
|
|
29
30
|
* @param options Parser options.
|
|
30
31
|
*/
|
|
31
32
|
constructor(xml: string, options: ParserOptions = {}) {
|
|
32
|
-
this.document = new XmlDocument();
|
|
33
|
-
this.currentNode = this.document;
|
|
34
|
-
this.options = options;
|
|
35
|
-
this.scanner = new StringScanner(normalizeXmlString(xml));
|
|
33
|
+
let doc = this.document = new XmlDocument();
|
|
36
34
|
|
|
37
|
-
this.
|
|
35
|
+
this.currentNode = doc;
|
|
36
|
+
this.options = options;
|
|
37
|
+
this.scanner = new StringScanner(xml);
|
|
38
38
|
|
|
39
|
-
if (
|
|
40
|
-
|
|
39
|
+
if (this.options.includeOffsets) {
|
|
40
|
+
doc.start = 0;
|
|
41
|
+
doc.end = xml.length;
|
|
41
42
|
}
|
|
42
43
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (!this.scanner.isEnd) {
|
|
46
|
-
throw this.error('Extra content at the end of the document');
|
|
47
|
-
}
|
|
44
|
+
this.parse();
|
|
48
45
|
}
|
|
49
46
|
|
|
50
47
|
/**
|
|
51
48
|
* Adds the given `XmlNode` as a child of `this.currentNode`.
|
|
52
49
|
*/
|
|
53
|
-
addNode(node: XmlNode) {
|
|
50
|
+
addNode(node: XmlNode, charIndex: number) {
|
|
54
51
|
node.parent = this.currentNode;
|
|
55
52
|
|
|
53
|
+
if (this.options.includeOffsets) {
|
|
54
|
+
node.start = this.scanner.charIndexToByteIndex(charIndex);
|
|
55
|
+
node.end = this.scanner.charIndexToByteIndex();
|
|
56
|
+
}
|
|
57
|
+
|
|
56
58
|
// @ts-expect-error: XmlDocument has a more limited set of possible children
|
|
57
59
|
// than XmlElement so TypeScript is unhappy, but we always do the right
|
|
58
60
|
// thing.
|
|
59
61
|
this.currentNode.children.push(node);
|
|
62
|
+
return true;
|
|
60
63
|
}
|
|
61
64
|
|
|
62
65
|
/**
|
|
63
66
|
* Adds the given _text_ to the document, either by appending it to a
|
|
64
67
|
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
|
|
65
68
|
*/
|
|
66
|
-
addText(text: string) {
|
|
69
|
+
addText(text: string, charIndex: number) {
|
|
67
70
|
let { children } = this.currentNode;
|
|
68
71
|
let { length } = children;
|
|
69
72
|
|
|
73
|
+
text = normalizeLineBreaks(text);
|
|
74
|
+
|
|
70
75
|
if (length > 0) {
|
|
71
76
|
let prevNode = children[length - 1];
|
|
72
77
|
|
|
73
|
-
if (prevNode
|
|
78
|
+
if (prevNode?.type === XmlNode.TYPE_TEXT) {
|
|
79
|
+
let textNode = prevNode as XmlText;
|
|
80
|
+
|
|
74
81
|
// The previous node is a text node, so we can append to it and avoid
|
|
75
82
|
// creating another node.
|
|
76
|
-
|
|
77
|
-
|
|
83
|
+
textNode.text += text;
|
|
84
|
+
|
|
85
|
+
if (this.options.includeOffsets) {
|
|
86
|
+
textNode.end = this.scanner.charIndexToByteIndex();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return true;
|
|
78
90
|
}
|
|
79
91
|
}
|
|
80
92
|
|
|
81
|
-
this.addNode(new XmlText(text));
|
|
93
|
+
return this.addNode(new XmlText(text), charIndex);
|
|
82
94
|
}
|
|
83
95
|
|
|
84
96
|
/**
|
|
@@ -159,7 +171,7 @@ export class Parser {
|
|
|
159
171
|
: syntax.attValueCharSingleQuote;
|
|
160
172
|
|
|
161
173
|
matchLoop: while (!scanner.isEnd) {
|
|
162
|
-
chars = scanner.
|
|
174
|
+
chars = scanner.consumeUntilMatch(regex);
|
|
163
175
|
|
|
164
176
|
if (chars) {
|
|
165
177
|
this.validateChars(chars);
|
|
@@ -178,7 +190,7 @@ export class Parser {
|
|
|
178
190
|
case '<':
|
|
179
191
|
throw this.error('Unescaped `<` is not allowed in an attribute value');
|
|
180
192
|
|
|
181
|
-
|
|
193
|
+
default:
|
|
182
194
|
break matchLoop;
|
|
183
195
|
}
|
|
184
196
|
}
|
|
@@ -199,25 +211,22 @@ export class Parser {
|
|
|
199
211
|
*/
|
|
200
212
|
consumeCdataSection(): boolean {
|
|
201
213
|
let { scanner } = this;
|
|
214
|
+
let startIndex = scanner.charIndex;
|
|
202
215
|
|
|
203
|
-
if (!scanner.
|
|
216
|
+
if (!scanner.consumeString('<![CDATA[')) {
|
|
204
217
|
return false;
|
|
205
218
|
}
|
|
206
219
|
|
|
207
220
|
let text = scanner.consumeUntilString(']]>');
|
|
208
221
|
this.validateChars(text);
|
|
209
222
|
|
|
210
|
-
if (!scanner.
|
|
223
|
+
if (!scanner.consumeString(']]>')) {
|
|
211
224
|
throw this.error('Unclosed CDATA section');
|
|
212
225
|
}
|
|
213
226
|
|
|
214
|
-
|
|
215
|
-
this.addNode(new XmlCdata(text))
|
|
216
|
-
|
|
217
|
-
this.addText(text);
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
return true;
|
|
227
|
+
return this.options.preserveCdata
|
|
228
|
+
? this.addNode(new XmlCdata(normalizeLineBreaks(text)), startIndex)
|
|
229
|
+
: this.addText(text, startIndex);
|
|
221
230
|
}
|
|
222
231
|
|
|
223
232
|
/**
|
|
@@ -228,6 +237,7 @@ export class Parser {
|
|
|
228
237
|
*/
|
|
229
238
|
consumeCharData(): boolean {
|
|
230
239
|
let { scanner } = this;
|
|
240
|
+
let startIndex = scanner.charIndex;
|
|
231
241
|
let charData = scanner.consumeUntilMatch(syntax.endCharData);
|
|
232
242
|
|
|
233
243
|
if (!charData) {
|
|
@@ -240,8 +250,7 @@ export class Parser {
|
|
|
240
250
|
throw this.error('Element content may not contain the CDATA section close delimiter `]]>`');
|
|
241
251
|
}
|
|
242
252
|
|
|
243
|
-
this.addText(charData);
|
|
244
|
-
return true;
|
|
253
|
+
return this.addText(charData, startIndex);
|
|
245
254
|
}
|
|
246
255
|
|
|
247
256
|
/**
|
|
@@ -252,15 +261,16 @@ export class Parser {
|
|
|
252
261
|
*/
|
|
253
262
|
consumeComment(): boolean {
|
|
254
263
|
let { scanner } = this;
|
|
264
|
+
let startIndex = scanner.charIndex;
|
|
255
265
|
|
|
256
|
-
if (!scanner.
|
|
266
|
+
if (!scanner.consumeString('<!--')) {
|
|
257
267
|
return false;
|
|
258
268
|
}
|
|
259
269
|
|
|
260
270
|
let content = scanner.consumeUntilString('--');
|
|
261
271
|
this.validateChars(content);
|
|
262
272
|
|
|
263
|
-
if (!scanner.
|
|
273
|
+
if (!scanner.consumeString('-->')) {
|
|
264
274
|
if (scanner.peek(2) === '--') {
|
|
265
275
|
throw this.error("The string `--` isn't allowed inside a comment");
|
|
266
276
|
}
|
|
@@ -268,11 +278,9 @@ export class Parser {
|
|
|
268
278
|
throw this.error('Unclosed comment');
|
|
269
279
|
}
|
|
270
280
|
|
|
271
|
-
|
|
272
|
-
this.addNode(new XmlComment(content
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
return true;
|
|
281
|
+
return this.options.preserveComments
|
|
282
|
+
? this.addNode(new XmlComment(normalizeLineBreaks(content)), startIndex)
|
|
283
|
+
: true;
|
|
276
284
|
}
|
|
277
285
|
|
|
278
286
|
/**
|
|
@@ -285,14 +293,12 @@ export class Parser {
|
|
|
285
293
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
|
286
294
|
*/
|
|
287
295
|
consumeContentReference(): boolean {
|
|
296
|
+
let startIndex = this.scanner.charIndex;
|
|
288
297
|
let ref = this.consumeReference();
|
|
289
298
|
|
|
290
|
-
|
|
291
|
-
this.addText(ref)
|
|
292
|
-
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
return false;
|
|
299
|
+
return ref
|
|
300
|
+
? this.addText(ref, startIndex)
|
|
301
|
+
: false;
|
|
296
302
|
}
|
|
297
303
|
|
|
298
304
|
/**
|
|
@@ -306,25 +312,68 @@ export class Parser {
|
|
|
306
312
|
*/
|
|
307
313
|
consumeDoctypeDeclaration(): boolean {
|
|
308
314
|
let { scanner } = this;
|
|
315
|
+
let startIndex = scanner.charIndex;
|
|
309
316
|
|
|
310
|
-
if (!scanner.
|
|
311
|
-
|| !this.consumeWhitespace()) {
|
|
312
|
-
|
|
317
|
+
if (!scanner.consumeString('<!DOCTYPE')) {
|
|
313
318
|
return false;
|
|
314
319
|
}
|
|
315
320
|
|
|
316
|
-
|
|
321
|
+
let name = this.consumeWhitespace()
|
|
322
|
+
&& this.consumeName();
|
|
317
323
|
|
|
318
|
-
if (
|
|
319
|
-
|
|
324
|
+
if (!name) {
|
|
325
|
+
throw this.error('Expected a name');
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
let publicId;
|
|
329
|
+
let systemId;
|
|
330
|
+
|
|
331
|
+
if (this.consumeWhitespace()) {
|
|
332
|
+
if (scanner.consumeString('PUBLIC')) {
|
|
333
|
+
publicId = this.consumeWhitespace()
|
|
334
|
+
&& this.consumePubidLiteral();
|
|
335
|
+
|
|
336
|
+
if (publicId === false) {
|
|
337
|
+
throw this.error('Expected a public identifier');
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
this.consumeWhitespace();
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (publicId !== undefined || scanner.consumeString('SYSTEM')) {
|
|
344
|
+
this.consumeWhitespace();
|
|
345
|
+
systemId = this.consumeSystemLiteral();
|
|
346
|
+
|
|
347
|
+
if (systemId === false) {
|
|
348
|
+
throw this.error('Expected a system identifier');
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
this.consumeWhitespace();
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
let internalSubset;
|
|
356
|
+
|
|
357
|
+
if (scanner.consumeString('[')) {
|
|
358
|
+
// The internal subset may contain comments that contain `]` characters,
|
|
359
|
+
// so we can't use `consumeUntilString()` here.
|
|
360
|
+
internalSubset = scanner.consumeUntilMatch(/\][\x20\t\r\n]*>/);
|
|
361
|
+
|
|
362
|
+
if (!scanner.consumeString(']')) {
|
|
363
|
+
throw this.error('Unclosed internal subset');
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
this.consumeWhitespace();
|
|
320
367
|
}
|
|
321
368
|
|
|
322
|
-
if (!scanner.
|
|
369
|
+
if (!scanner.consumeString('>')) {
|
|
323
370
|
throw this.error('Unclosed doctype declaration');
|
|
324
371
|
}
|
|
325
372
|
|
|
326
|
-
return
|
|
327
|
-
|
|
373
|
+
return this.options.preserveDocumentType
|
|
374
|
+
? this.addNode(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex)
|
|
375
|
+
: true;
|
|
376
|
+
}
|
|
328
377
|
|
|
329
378
|
/**
|
|
330
379
|
* Consumes an element if possible.
|
|
@@ -334,27 +383,27 @@ export class Parser {
|
|
|
334
383
|
*/
|
|
335
384
|
consumeElement(): boolean {
|
|
336
385
|
let { scanner } = this;
|
|
337
|
-
let
|
|
386
|
+
let startIndex = scanner.charIndex;
|
|
338
387
|
|
|
339
|
-
if (!scanner.
|
|
388
|
+
if (!scanner.consumeString('<')) {
|
|
340
389
|
return false;
|
|
341
390
|
}
|
|
342
391
|
|
|
343
392
|
let name = this.consumeName();
|
|
344
393
|
|
|
345
394
|
if (!name) {
|
|
346
|
-
scanner.reset(
|
|
395
|
+
scanner.reset(startIndex);
|
|
347
396
|
return false;
|
|
348
397
|
}
|
|
349
398
|
|
|
350
399
|
let attributes = this.consumeAttributes();
|
|
351
|
-
let isEmpty =
|
|
400
|
+
let isEmpty = !!scanner.consumeString('/>');
|
|
352
401
|
let element = new XmlElement(name, attributes);
|
|
353
402
|
|
|
354
403
|
element.parent = this.currentNode;
|
|
355
404
|
|
|
356
405
|
if (!isEmpty) {
|
|
357
|
-
if (!scanner.
|
|
406
|
+
if (!scanner.consumeString('>')) {
|
|
358
407
|
throw this.error(`Unclosed start tag for element \`${name}\``);
|
|
359
408
|
}
|
|
360
409
|
|
|
@@ -373,7 +422,7 @@ export class Parser {
|
|
|
373
422
|
let endTagMark = scanner.charIndex;
|
|
374
423
|
let endTagName;
|
|
375
424
|
|
|
376
|
-
if (!scanner.
|
|
425
|
+
if (!scanner.consumeString('</')
|
|
377
426
|
|| !(endTagName = this.consumeName())
|
|
378
427
|
|| endTagName !== name) {
|
|
379
428
|
|
|
@@ -383,15 +432,14 @@ export class Parser {
|
|
|
383
432
|
|
|
384
433
|
this.consumeWhitespace();
|
|
385
434
|
|
|
386
|
-
if (!scanner.
|
|
435
|
+
if (!scanner.consumeString('>')) {
|
|
387
436
|
throw this.error(`Unclosed end tag for element ${name}`);
|
|
388
437
|
}
|
|
389
438
|
|
|
390
439
|
this.currentNode = element.parent;
|
|
391
440
|
}
|
|
392
441
|
|
|
393
|
-
this.addNode(element);
|
|
394
|
-
return true;
|
|
442
|
+
return this.addNode(element, startIndex);
|
|
395
443
|
}
|
|
396
444
|
|
|
397
445
|
/**
|
|
@@ -403,7 +451,7 @@ export class Parser {
|
|
|
403
451
|
consumeEqual(): boolean {
|
|
404
452
|
this.consumeWhitespace();
|
|
405
453
|
|
|
406
|
-
if (this.scanner.
|
|
454
|
+
if (this.scanner.consumeString('=')) {
|
|
407
455
|
this.consumeWhitespace();
|
|
408
456
|
return true;
|
|
409
457
|
}
|
|
@@ -443,9 +491,9 @@ export class Parser {
|
|
|
443
491
|
*/
|
|
444
492
|
consumeProcessingInstruction(): boolean {
|
|
445
493
|
let { scanner } = this;
|
|
446
|
-
let
|
|
494
|
+
let startIndex = scanner.charIndex;
|
|
447
495
|
|
|
448
|
-
if (!scanner.
|
|
496
|
+
if (!scanner.consumeString('<?')) {
|
|
449
497
|
return false;
|
|
450
498
|
}
|
|
451
499
|
|
|
@@ -453,7 +501,7 @@ export class Parser {
|
|
|
453
501
|
|
|
454
502
|
if (name) {
|
|
455
503
|
if (name.toLowerCase() === 'xml') {
|
|
456
|
-
scanner.reset(
|
|
504
|
+
scanner.reset(startIndex);
|
|
457
505
|
throw this.error("XML declaration isn't allowed here");
|
|
458
506
|
}
|
|
459
507
|
} else {
|
|
@@ -461,9 +509,8 @@ export class Parser {
|
|
|
461
509
|
}
|
|
462
510
|
|
|
463
511
|
if (!this.consumeWhitespace()) {
|
|
464
|
-
if (scanner.
|
|
465
|
-
this.addNode(new XmlProcessingInstruction(name));
|
|
466
|
-
return true;
|
|
512
|
+
if (scanner.consumeString('?>')) {
|
|
513
|
+
return this.addNode(new XmlProcessingInstruction(name), startIndex);
|
|
467
514
|
}
|
|
468
515
|
|
|
469
516
|
throw this.error('Whitespace is required after a processing instruction name');
|
|
@@ -472,12 +519,11 @@ export class Parser {
|
|
|
472
519
|
let content = scanner.consumeUntilString('?>');
|
|
473
520
|
this.validateChars(content);
|
|
474
521
|
|
|
475
|
-
if (!scanner.
|
|
522
|
+
if (!scanner.consumeString('?>')) {
|
|
476
523
|
throw this.error('Unterminated processing instruction');
|
|
477
524
|
}
|
|
478
525
|
|
|
479
|
-
this.addNode(new XmlProcessingInstruction(name, content));
|
|
480
|
-
return true;
|
|
526
|
+
return this.addNode(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
|
|
481
527
|
}
|
|
482
528
|
|
|
483
529
|
/**
|
|
@@ -488,7 +534,7 @@ export class Parser {
|
|
|
488
534
|
*/
|
|
489
535
|
consumeProlog(): boolean {
|
|
490
536
|
let { scanner } = this;
|
|
491
|
-
let
|
|
537
|
+
let startIndex = scanner.charIndex;
|
|
492
538
|
|
|
493
539
|
this.consumeXmlDeclaration();
|
|
494
540
|
|
|
@@ -498,7 +544,29 @@ export class Parser {
|
|
|
498
544
|
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
499
545
|
}
|
|
500
546
|
|
|
501
|
-
return
|
|
547
|
+
return startIndex < scanner.charIndex;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Consumes a public identifier literal if possible.
|
|
552
|
+
*
|
|
553
|
+
* @returns
|
|
554
|
+
* Value of the public identifier literal minus quotes, or `false` if
|
|
555
|
+
* nothing was consumed. An empty string indicates that a public id literal
|
|
556
|
+
* was consumed but was empty.
|
|
557
|
+
*
|
|
558
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
|
|
559
|
+
*/
|
|
560
|
+
consumePubidLiteral(): string | false {
|
|
561
|
+
let startIndex = this.scanner.charIndex;
|
|
562
|
+
let value = this.consumeSystemLiteral();
|
|
563
|
+
|
|
564
|
+
if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
|
|
565
|
+
this.scanner.reset(startIndex);
|
|
566
|
+
throw this.error('Invalid character in public identifier');
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
return value;
|
|
502
570
|
}
|
|
503
571
|
|
|
504
572
|
/**
|
|
@@ -516,7 +584,7 @@ export class Parser {
|
|
|
516
584
|
consumeReference(): string | false {
|
|
517
585
|
let { scanner } = this;
|
|
518
586
|
|
|
519
|
-
if (!scanner.
|
|
587
|
+
if (!scanner.consumeString('&')) {
|
|
520
588
|
return false;
|
|
521
589
|
}
|
|
522
590
|
|
|
@@ -596,7 +664,7 @@ export class Parser {
|
|
|
596
664
|
*/
|
|
597
665
|
consumeSystemLiteral(): string | false {
|
|
598
666
|
let { scanner } = this;
|
|
599
|
-
let quote = scanner.
|
|
667
|
+
let quote = scanner.consumeString('"') || scanner.consumeString("'");
|
|
600
668
|
|
|
601
669
|
if (!quote) {
|
|
602
670
|
return false;
|
|
@@ -605,7 +673,7 @@ export class Parser {
|
|
|
605
673
|
let value = scanner.consumeUntilString(quote);
|
|
606
674
|
this.validateChars(value);
|
|
607
675
|
|
|
608
|
-
if (!scanner.
|
|
676
|
+
if (!scanner.consumeString(quote)) {
|
|
609
677
|
throw this.error('Missing end quote');
|
|
610
678
|
}
|
|
611
679
|
|
|
@@ -619,7 +687,7 @@ export class Parser {
|
|
|
619
687
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
620
688
|
*/
|
|
621
689
|
consumeWhitespace(): boolean {
|
|
622
|
-
return
|
|
690
|
+
return !!this.scanner.consumeMatchFn(syntax.isWhitespace);
|
|
623
691
|
}
|
|
624
692
|
|
|
625
693
|
/**
|
|
@@ -630,8 +698,9 @@ export class Parser {
|
|
|
630
698
|
*/
|
|
631
699
|
consumeXmlDeclaration(): boolean {
|
|
632
700
|
let { scanner } = this;
|
|
701
|
+
let startIndex = scanner.charIndex;
|
|
633
702
|
|
|
634
|
-
if (!scanner.
|
|
703
|
+
if (!scanner.consumeString('<?xml')) {
|
|
635
704
|
return false;
|
|
636
705
|
}
|
|
637
706
|
|
|
@@ -639,7 +708,7 @@ export class Parser {
|
|
|
639
708
|
throw this.error('Invalid XML declaration');
|
|
640
709
|
}
|
|
641
710
|
|
|
642
|
-
let version =
|
|
711
|
+
let version = !!scanner.consumeString('version')
|
|
643
712
|
&& this.consumeEqual()
|
|
644
713
|
&& this.consumeSystemLiteral();
|
|
645
714
|
|
|
@@ -649,16 +718,22 @@ export class Parser {
|
|
|
649
718
|
throw this.error('Invalid character in version number');
|
|
650
719
|
}
|
|
651
720
|
|
|
721
|
+
let encoding;
|
|
722
|
+
let standalone;
|
|
723
|
+
|
|
652
724
|
if (this.consumeWhitespace()) {
|
|
653
|
-
|
|
725
|
+
encoding = !!scanner.consumeString('encoding')
|
|
654
726
|
&& this.consumeEqual()
|
|
655
727
|
&& this.consumeSystemLiteral();
|
|
656
728
|
|
|
657
729
|
if (encoding) {
|
|
730
|
+
if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
|
|
731
|
+
throw this.error('Invalid character in encoding name');
|
|
732
|
+
}
|
|
658
733
|
this.consumeWhitespace();
|
|
659
734
|
}
|
|
660
735
|
|
|
661
|
-
|
|
736
|
+
standalone = !!scanner.consumeString('standalone')
|
|
662
737
|
&& this.consumeEqual()
|
|
663
738
|
&& this.consumeSystemLiteral();
|
|
664
739
|
|
|
@@ -671,69 +746,43 @@ export class Parser {
|
|
|
671
746
|
}
|
|
672
747
|
}
|
|
673
748
|
|
|
674
|
-
if (!scanner.
|
|
749
|
+
if (!scanner.consumeString('?>')) {
|
|
675
750
|
throw this.error('Invalid or unclosed XML declaration');
|
|
676
751
|
}
|
|
677
752
|
|
|
678
|
-
return
|
|
753
|
+
return this.options.preserveXmlDeclaration
|
|
754
|
+
? this.addNode(new XmlDeclaration(
|
|
755
|
+
version,
|
|
756
|
+
encoding || undefined,
|
|
757
|
+
(standalone as 'yes' | 'no' | false) || undefined,
|
|
758
|
+
), startIndex)
|
|
759
|
+
: true;
|
|
679
760
|
}
|
|
680
761
|
|
|
681
762
|
/**
|
|
682
|
-
*
|
|
763
|
+
* Returns an `XmlError` for the current scanner position.
|
|
683
764
|
*/
|
|
684
765
|
error(message: string) {
|
|
685
|
-
let {
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
let line = 1;
|
|
689
|
-
|
|
690
|
-
// Find the line and column where the error occurred.
|
|
691
|
-
for (let i = 0; i < charIndex; ++i) {
|
|
692
|
-
let char = xml[i];
|
|
693
|
-
|
|
694
|
-
if (char === '\n') {
|
|
695
|
-
column = 1;
|
|
696
|
-
excerpt = '';
|
|
697
|
-
line += 1;
|
|
698
|
-
} else {
|
|
699
|
-
column += 1;
|
|
700
|
-
excerpt += char;
|
|
701
|
-
}
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
let eol = xml.indexOf('\n', charIndex);
|
|
705
|
-
|
|
706
|
-
excerpt += eol === -1
|
|
707
|
-
? xml.slice(charIndex)
|
|
708
|
-
: xml.slice(charIndex, eol);
|
|
766
|
+
let { scanner } = this;
|
|
767
|
+
return new XmlError(message, scanner.charIndex, scanner.string);
|
|
768
|
+
}
|
|
709
769
|
|
|
710
|
-
|
|
770
|
+
/**
|
|
771
|
+
* Parses the XML input.
|
|
772
|
+
*/
|
|
773
|
+
parse() {
|
|
774
|
+
this.scanner.consumeString('\uFEFF'); // byte order mark
|
|
775
|
+
this.consumeProlog();
|
|
711
776
|
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
if (excerpt.length > 50) {
|
|
715
|
-
if (column < 40) {
|
|
716
|
-
excerpt = excerpt.slice(0, 50);
|
|
717
|
-
} else {
|
|
718
|
-
excerptStart = column - 20;
|
|
719
|
-
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
720
|
-
}
|
|
777
|
+
if (!this.consumeElement()) {
|
|
778
|
+
throw this.error('Root element is missing or invalid');
|
|
721
779
|
}
|
|
722
780
|
|
|
723
|
-
|
|
724
|
-
`${message} (line ${line}, column ${column})\n`
|
|
725
|
-
+ ` ${excerpt}\n`
|
|
726
|
-
+ ' '.repeat(column - excerptStart + 1) + '^\n',
|
|
727
|
-
);
|
|
728
|
-
|
|
729
|
-
Object.assign(err, {
|
|
730
|
-
column,
|
|
731
|
-
excerpt,
|
|
732
|
-
line,
|
|
733
|
-
pos: charIndex,
|
|
734
|
-
});
|
|
781
|
+
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
735
782
|
|
|
736
|
-
|
|
783
|
+
if (!this.scanner.isEnd) {
|
|
784
|
+
throw this.error('Extra content at the end of the document');
|
|
785
|
+
}
|
|
737
786
|
}
|
|
738
787
|
|
|
739
788
|
/**
|
|
@@ -761,15 +810,19 @@ export class Parser {
|
|
|
761
810
|
// -- Private Functions --------------------------------------------------------
|
|
762
811
|
|
|
763
812
|
/**
|
|
764
|
-
* Normalizes the given
|
|
765
|
-
*
|
|
813
|
+
* Normalizes line breaks in the given text by replacing CRLF sequences and lone
|
|
814
|
+
* CR characters with LF characters.
|
|
766
815
|
*/
|
|
767
|
-
function
|
|
768
|
-
|
|
769
|
-
|
|
816
|
+
function normalizeLineBreaks(text: string): string {
|
|
817
|
+
let i = 0;
|
|
818
|
+
|
|
819
|
+
while ((i = text.indexOf('\r', i)) !== -1) {
|
|
820
|
+
text = text[i + 1] === '\n'
|
|
821
|
+
? text.slice(0, i) + text.slice(i + 1)
|
|
822
|
+
: text.slice(0, i) + '\n' + text.slice(i + 1);
|
|
770
823
|
}
|
|
771
824
|
|
|
772
|
-
return
|
|
825
|
+
return text;
|
|
773
826
|
}
|
|
774
827
|
|
|
775
828
|
// -- Types --------------------------------------------------------------------
|
|
@@ -782,6 +835,14 @@ export type ParserOptions = {
|
|
|
782
835
|
*/
|
|
783
836
|
ignoreUndefinedEntities?: boolean;
|
|
784
837
|
|
|
838
|
+
/**
|
|
839
|
+
* When `true`, the starting and ending byte offsets of each node in the input
|
|
840
|
+
* string will be made available via `start` and `end` properties on the node.
|
|
841
|
+
*
|
|
842
|
+
* @default false
|
|
843
|
+
*/
|
|
844
|
+
includeOffsets?: boolean;
|
|
845
|
+
|
|
785
846
|
/**
|
|
786
847
|
* When `true`, CDATA sections will be preserved in the document as `XmlCdata`
|
|
787
848
|
* nodes. Otherwise CDATA sections will be represented as `XmlText` nodes,
|
|
@@ -799,6 +860,32 @@ export type ParserOptions = {
|
|
|
799
860
|
*/
|
|
800
861
|
preserveComments?: boolean;
|
|
801
862
|
|
|
863
|
+
/**
|
|
864
|
+
* When `true`, a document type declaration (if present) will be preserved in
|
|
865
|
+
* the document as an `XmlDocumentType` node. Otherwise the declaration will
|
|
866
|
+
* not be included in the node tree.
|
|
867
|
+
*
|
|
868
|
+
* Note that when this is `true` and a document type declaration is present,
|
|
869
|
+
* the DTD will precede the root node in the node tree (normally the root
|
|
870
|
+
* node would be first).
|
|
871
|
+
*
|
|
872
|
+
* @default false
|
|
873
|
+
*/
|
|
874
|
+
preserveDocumentType?: boolean;
|
|
875
|
+
|
|
876
|
+
/**
|
|
877
|
+
* When `true`, an XML declaration (if present) will be preserved in the
|
|
878
|
+
* document as an `XmlDeclaration` node. Otherwise the declaration will not be
|
|
879
|
+
* included in the node tree.
|
|
880
|
+
*
|
|
881
|
+
* Note that when this is `true` and an XML declaration is present, the
|
|
882
|
+
* XML declaration will be the first child of the document (normally the root
|
|
883
|
+
* node would be first).
|
|
884
|
+
*
|
|
885
|
+
* @default false
|
|
886
|
+
*/
|
|
887
|
+
preserveXmlDeclaration?: boolean;
|
|
888
|
+
|
|
802
889
|
/**
|
|
803
890
|
* When an undefined named entity is encountered, this function will be called
|
|
804
891
|
* with the entity as its only argument. It should return a string value with
|