@rgrove/parse-xml 3.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +72 -97
  3. package/dist/browser.js +774 -0
  4. package/dist/browser.js.map +7 -0
  5. package/dist/global.min.js +10 -0
  6. package/dist/global.min.js.map +7 -0
  7. package/dist/index.d.ts +24 -0
  8. package/dist/index.d.ts.map +1 -0
  9. package/dist/index.js +50 -0
  10. package/dist/index.js.map +1 -0
  11. package/dist/lib/Parser.d.ts +218 -0
  12. package/dist/lib/Parser.d.ts.map +1 -0
  13. package/dist/lib/Parser.js +638 -0
  14. package/dist/lib/Parser.js.map +1 -0
  15. package/dist/lib/StringScanner.d.ts +97 -0
  16. package/dist/lib/StringScanner.d.ts.map +1 -0
  17. package/dist/lib/StringScanner.js +210 -0
  18. package/dist/lib/StringScanner.js.map +1 -0
  19. package/dist/lib/XmlCdata.d.ts +8 -0
  20. package/dist/lib/XmlCdata.d.ts.map +1 -0
  21. package/dist/lib/XmlCdata.js +15 -0
  22. package/dist/lib/XmlCdata.js.map +1 -0
  23. package/dist/lib/XmlComment.d.ts +16 -0
  24. package/dist/lib/XmlComment.d.ts.map +1 -0
  25. package/dist/lib/XmlComment.js +23 -0
  26. package/dist/lib/XmlComment.js.map +1 -0
  27. package/dist/lib/XmlDocument.d.ts +29 -0
  28. package/dist/lib/XmlDocument.d.ts.map +1 -0
  29. package/dist/lib/XmlDocument.js +47 -0
  30. package/dist/lib/XmlDocument.js.map +1 -0
  31. package/dist/lib/XmlElement.d.ts +40 -0
  32. package/dist/lib/XmlElement.d.ts.map +1 -0
  33. package/dist/lib/XmlElement.js +51 -0
  34. package/dist/lib/XmlElement.js.map +1 -0
  35. package/dist/lib/XmlNode.d.ts +74 -0
  36. package/dist/lib/XmlNode.d.ts.map +1 -0
  37. package/dist/lib/XmlNode.js +96 -0
  38. package/dist/lib/XmlNode.js.map +1 -0
  39. package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
  40. package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
  41. package/dist/lib/XmlProcessingInstruction.js +25 -0
  42. package/dist/lib/XmlProcessingInstruction.js.map +1 -0
  43. package/dist/lib/XmlText.d.ts +16 -0
  44. package/dist/lib/XmlText.d.ts.map +1 -0
  45. package/dist/lib/XmlText.js +23 -0
  46. package/dist/lib/XmlText.js.map +1 -0
  47. package/dist/lib/syntax.d.ts +69 -0
  48. package/dist/lib/syntax.d.ts.map +1 -0
  49. package/dist/lib/syntax.js +133 -0
  50. package/dist/lib/syntax.js.map +1 -0
  51. package/dist/lib/types.d.ts +5 -0
  52. package/dist/lib/types.d.ts.map +1 -0
  53. package/dist/lib/types.js +3 -0
  54. package/dist/lib/types.js.map +1 -0
  55. package/package.json +33 -26
  56. package/src/index.ts +30 -0
  57. package/src/lib/Parser.ts +819 -0
  58. package/src/lib/StringScanner.ts +254 -0
  59. package/src/lib/XmlCdata.ts +11 -0
  60. package/src/lib/XmlComment.ts +26 -0
  61. package/src/lib/XmlDocument.ts +57 -0
  62. package/src/lib/XmlElement.ts +81 -0
  63. package/src/lib/XmlNode.ts +107 -0
  64. package/src/lib/XmlProcessingInstruction.ts +35 -0
  65. package/src/lib/XmlText.ts +26 -0
  66. package/src/lib/syntax.ts +136 -0
  67. package/src/lib/types.ts +2 -0
  68. package/CHANGELOG.md +0 -162
  69. package/dist/types/index.d.ts +0 -68
  70. package/dist/types/index.d.ts.map +0 -1
  71. package/dist/types/lib/Parser.d.ts +0 -234
  72. package/dist/types/lib/Parser.d.ts.map +0 -1
  73. package/dist/types/lib/StringScanner.d.ts +0 -139
  74. package/dist/types/lib/StringScanner.d.ts.map +0 -1
  75. package/dist/types/lib/XmlCdata.d.ts +0 -11
  76. package/dist/types/lib/XmlCdata.d.ts.map +0 -1
  77. package/dist/types/lib/XmlComment.d.ts +0 -21
  78. package/dist/types/lib/XmlComment.d.ts.map +0 -1
  79. package/dist/types/lib/XmlDocument.d.ts +0 -42
  80. package/dist/types/lib/XmlDocument.d.ts.map +0 -1
  81. package/dist/types/lib/XmlElement.d.ts +0 -62
  82. package/dist/types/lib/XmlElement.d.ts.map +0 -1
  83. package/dist/types/lib/XmlNode.d.ts +0 -78
  84. package/dist/types/lib/XmlNode.d.ts.map +0 -1
  85. package/dist/types/lib/XmlProcessingInstruction.d.ts +0 -30
  86. package/dist/types/lib/XmlProcessingInstruction.d.ts.map +0 -1
  87. package/dist/types/lib/XmlText.d.ts +0 -21
  88. package/dist/types/lib/XmlText.d.ts.map +0 -1
  89. package/dist/types/lib/syntax.d.ts +0 -59
  90. package/dist/types/lib/syntax.d.ts.map +0 -1
  91. package/dist/umd/parse-xml.min.js +0 -2
  92. package/dist/umd/parse-xml.min.js.map +0 -1
  93. package/src/index.js +0 -67
  94. package/src/lib/Parser.js +0 -812
  95. package/src/lib/StringScanner.js +0 -312
  96. package/src/lib/XmlCdata.js +0 -17
  97. package/src/lib/XmlComment.js +0 -37
  98. package/src/lib/XmlDocument.js +0 -69
  99. package/src/lib/XmlElement.js +0 -101
  100. package/src/lib/XmlNode.js +0 -152
  101. package/src/lib/XmlProcessingInstruction.js +0 -48
  102. package/src/lib/XmlText.js +0 -37
  103. package/src/lib/syntax.js +0 -153
package/src/lib/Parser.js DELETED
@@ -1,812 +0,0 @@
1
- 'use strict';
2
-
3
- const StringScanner = require('./StringScanner');
4
- const syntax = require('./syntax');
5
- const XmlCdata = require('./XmlCdata');
6
- const XmlComment = require('./XmlComment');
7
- const XmlDocument = require('./XmlDocument');
8
- const XmlElement = require('./XmlElement');
9
- const XmlProcessingInstruction = require('./XmlProcessingInstruction');
10
- const XmlText = require('./XmlText');
11
-
12
- const emptyString = '';
13
-
14
- /**
15
- Parses an XML string into an `XmlDocument`.
16
-
17
- @private
18
- */
19
- class Parser {
20
- /**
21
- @param {string} xml
22
- XML string to parse.
23
-
24
- @param {object} [options]
25
- Parsing options.
26
-
27
- @param {boolean} [options.ignoreUndefinedEntities=false]
28
- @param {boolean} [options.preserveCdata=false]
29
- @param {boolean} [options.preserveComments=false]
30
- @param {(entity: string) => string?} [options.resolveUndefinedEntity]
31
- @param {boolean} [options.sortAttributes=false]
32
- */
33
- constructor(xml, options = {}) {
34
- /** @type {XmlDocument} */
35
- this.document = new XmlDocument();
36
-
37
- /** @type {XmlDocument|XmlElement} */
38
- this.currentNode = this.document;
39
-
40
- this.options = options;
41
- this.scanner = new StringScanner(normalizeXmlString(xml));
42
-
43
- this.consumeProlog();
44
-
45
- if (!this.consumeElement()) {
46
- this.error('Root element is missing or invalid');
47
- }
48
-
49
- while (this.consumeMisc()) {} // eslint-disable-line no-empty
50
-
51
- if (!this.scanner.isEnd) {
52
- this.error('Extra content at the end of the document');
53
- }
54
- }
55
-
56
- /**
57
- Adds the given `XmlNode` as a child of `this.currentNode`.
58
-
59
- @param {XmlNode} node
60
- */
61
- addNode(node) {
62
- node.parent = this.currentNode;
63
-
64
- // @ts-ignore
65
- this.currentNode.children.push(node);
66
- }
67
-
68
- /**
69
- Adds the given _text_ to the document, either by appending it to a preceding
70
- `XmlText` node (if possible) or by creating a new `XmlText` node.
71
-
72
- @param {string} text
73
- */
74
- addText(text) {
75
- let { children } = this.currentNode;
76
-
77
- if (children.length > 0) {
78
- let prevNode = children[children.length - 1];
79
-
80
- if (prevNode instanceof XmlText) {
81
- // The previous node is a text node, so we can append to it and avoid
82
- // creating another node.
83
- prevNode.text += text;
84
- return;
85
- }
86
- }
87
-
88
- this.addNode(new XmlText(text));
89
- }
90
-
91
- /**
92
- Consumes an `AttValue` (attribute value) if possible.
93
-
94
- @returns {string|false}
95
- Contents of the `AttValue` minus quotes, or `false` if nothing was consumed.
96
- An empty string indicates that an `AttValue` was consumed but was empty.
97
-
98
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
99
- */
100
- consumeAttributeValue() {
101
- let { scanner } = this;
102
- let quote = scanner.peek();
103
-
104
- if (quote !== '"' && quote !== "'") {
105
- return false;
106
- }
107
-
108
- scanner.advance();
109
-
110
- let chars;
111
- let isClosed = false;
112
- let value = emptyString;
113
- let regex = quote === '"'
114
- ? /[^"&<]+/y
115
- : /[^'&<]+/y;
116
-
117
- matchLoop: while (!scanner.isEnd) {
118
- chars = scanner.consumeMatch(regex);
119
-
120
- if (chars) {
121
- this.validateChars(chars);
122
- value += chars.replace(/[\t\r\n]/g, ' ');
123
- }
124
-
125
- let nextChar = scanner.peek();
126
-
127
- switch (nextChar) {
128
- case quote:
129
- isClosed = true;
130
- break matchLoop;
131
-
132
- case '&':
133
- value += this.consumeReference();
134
- continue;
135
-
136
- case '<':
137
- this.error('Unescaped `<` is not allowed in an attribute value'); /* istanbul ignore next */
138
- break;
139
-
140
- case emptyString:
141
- this.error('Unclosed attribute'); /* istanbul ignore next */
142
- break;
143
-
144
- }
145
- }
146
-
147
- if (!isClosed) {
148
- this.error('Unclosed attribute');
149
- }
150
-
151
- scanner.advance();
152
- return value;
153
- }
154
-
155
- /**
156
- Consumes a CDATA section if possible.
157
-
158
- @returns {boolean}
159
- Whether a CDATA section was consumed.
160
-
161
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
162
- */
163
- consumeCdataSection() {
164
- let { scanner } = this;
165
-
166
- if (!scanner.consumeStringFast('<![CDATA[')) {
167
- return false;
168
- }
169
-
170
- let text = scanner.consumeUntilString(']]>');
171
- this.validateChars(text);
172
-
173
- if (!scanner.consumeStringFast(']]>')) {
174
- this.error('Unclosed CDATA section');
175
- }
176
-
177
- if (this.options.preserveCdata) {
178
- this.addNode(new XmlCdata(text));
179
- } else {
180
- this.addText(text);
181
- }
182
-
183
- return true;
184
- }
185
-
186
- /**
187
- Consumes character data if possible.
188
-
189
- @returns {boolean}
190
- Whether character data was consumed.
191
-
192
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
193
- */
194
- consumeCharData() {
195
- let { scanner } = this;
196
- let charData = scanner.consumeUntilMatch(/<|&|]]>/g);
197
-
198
- if (!charData) {
199
- return false;
200
- }
201
-
202
- this.validateChars(charData);
203
-
204
- if (scanner.peek() === ']' && scanner.peek(3) === ']]>') {
205
- this.error('Element content may not contain the CDATA section close delimiter `]]>`');
206
- }
207
-
208
- this.addText(charData);
209
- return true;
210
- }
211
-
212
- /**
213
- Consumes a comment if possible.
214
-
215
- @returns {boolean}
216
- Whether a comment was consumed.
217
-
218
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
219
- */
220
- consumeComment() {
221
- let { scanner } = this;
222
-
223
- if (!scanner.consumeStringFast('<!--')) {
224
- return false;
225
- }
226
-
227
- let content = scanner.consumeUntilString('--');
228
- this.validateChars(content);
229
-
230
- if (!scanner.consumeStringFast('-->')) {
231
- if (scanner.peek(2) === '--') {
232
- this.error("The string `--` isn't allowed inside a comment");
233
- } else {
234
- this.error('Unclosed comment');
235
- }
236
- }
237
-
238
- if (this.options.preserveComments) {
239
- this.addNode(new XmlComment(content.trim()));
240
- }
241
-
242
- return true;
243
- }
244
-
245
- /**
246
- Consumes a reference in a content context if possible.
247
-
248
- This differs from `consumeReference()` in that a consumed reference will be
249
- added to the document as a text node instead of returned.
250
-
251
- @returns {boolean}
252
- Whether a reference was consumed.
253
-
254
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
255
- */
256
- consumeContentReference() {
257
- let ref = this.consumeReference();
258
-
259
- if (ref) {
260
- this.addText(ref);
261
- return true;
262
- }
263
-
264
- return false;
265
- }
266
-
267
- /**
268
- Consumes a doctype declaration if possible.
269
-
270
- This is a loose implementation since doctype declarations are currently
271
- discarded without further parsing.
272
-
273
- @returns {boolean}
274
- Whether a doctype declaration was consumed.
275
-
276
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
277
- */
278
- consumeDoctypeDeclaration() {
279
- let { scanner } = this;
280
-
281
- if (!scanner.consumeStringFast('<!DOCTYPE')
282
- || !this.consumeWhitespace()) {
283
-
284
- return false;
285
- }
286
-
287
- scanner.consumeMatch(/[^[>]+/y);
288
-
289
- if (scanner.consumeMatch(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
290
- return true;
291
- }
292
-
293
- if (!scanner.consumeStringFast('>')) {
294
- this.error('Unclosed doctype declaration');
295
- }
296
-
297
- return true;
298
- }
299
-
300
- /**
301
- Consumes an element if possible.
302
-
303
- @returns {boolean}
304
- Whether an element was consumed.
305
-
306
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
307
- */
308
- consumeElement() {
309
- let { scanner } = this;
310
- let mark = scanner.charIndex;
311
-
312
- if (scanner.peek() !== '<') {
313
- return false;
314
- }
315
-
316
- scanner.advance();
317
- let name = this.consumeName();
318
-
319
- if (!name) {
320
- scanner.reset(mark);
321
- return false;
322
- }
323
-
324
- let attributes = Object.create(null);
325
-
326
- while (this.consumeWhitespace()) {
327
- let attrName = this.consumeName();
328
-
329
- if (!attrName) {
330
- continue;
331
- }
332
-
333
- let attrValue = this.consumeEqual()
334
- && this.consumeAttributeValue();
335
-
336
- if (attrValue === false) {
337
- this.error('Attribute value expected');
338
- }
339
-
340
- if (attrName in attributes) {
341
- this.error(`Duplicate attribute: ${attrName}`);
342
- }
343
-
344
- if (attrName === 'xml:space'
345
- && attrValue !== 'default'
346
- && attrValue !== 'preserve') {
347
-
348
- this.error('Value of the `xml:space` attribute must be "default" or "preserve"');
349
- }
350
-
351
- attributes[attrName] = attrValue;
352
- }
353
-
354
- if (this.options.sortAttributes) {
355
- let attrNames = Object.keys(attributes).sort();
356
- let sortedAttributes = Object.create(null);
357
-
358
- for (let i = 0; i < attrNames.length; ++i) {
359
- let attrName = attrNames[i];
360
- sortedAttributes[attrName] = attributes[attrName];
361
- }
362
-
363
- attributes = sortedAttributes;
364
- }
365
-
366
- let isEmpty = Boolean(scanner.consumeStringFast('/>'));
367
- let element = new XmlElement(name, attributes);
368
-
369
- element.parent = this.currentNode;
370
-
371
- if (!isEmpty) {
372
- if (!scanner.consumeStringFast('>')) {
373
- this.error(`Unclosed start tag for element \`${name}\``);
374
- }
375
-
376
- this.currentNode = element;
377
- this.consumeCharData();
378
-
379
- while (
380
- this.consumeElement()
381
- || this.consumeContentReference()
382
- || this.consumeCdataSection()
383
- || this.consumeProcessingInstruction()
384
- || this.consumeComment()
385
- ) {
386
- this.consumeCharData();
387
- }
388
-
389
- let endTagMark = scanner.charIndex;
390
- let endTagName;
391
-
392
- if (!scanner.consumeStringFast('</')
393
- || !(endTagName = this.consumeName())
394
- || endTagName !== name) {
395
-
396
- scanner.reset(endTagMark);
397
- this.error(`Missing end tag for element ${name}`);
398
- }
399
-
400
- this.consumeWhitespace();
401
-
402
- if (!scanner.consumeStringFast('>')) {
403
- this.error(`Unclosed end tag for element ${name}`);
404
- }
405
-
406
- this.currentNode = element.parent;
407
- }
408
-
409
- this.addNode(element);
410
- return true;
411
- }
412
-
413
- /**
414
- Consumes an `Eq` production if possible.
415
-
416
- @returns {boolean}
417
- Whether an `Eq` production was consumed.
418
-
419
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
420
- */
421
- consumeEqual() {
422
- this.consumeWhitespace();
423
-
424
- if (this.scanner.consumeStringFast('=')) {
425
- this.consumeWhitespace();
426
- return true;
427
- }
428
-
429
- return false;
430
- }
431
-
432
- /**
433
- Consumes `Misc` content if possible.
434
-
435
- @returns {boolean}
436
- Whether anything was consumed.
437
-
438
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
439
- */
440
- consumeMisc() {
441
- return this.consumeComment()
442
- || this.consumeProcessingInstruction()
443
- || this.consumeWhitespace();
444
- }
445
-
446
- /**
447
- Consumes one or more `Name` characters if possible.
448
-
449
- @returns {string}
450
- `Name` characters, or an empty string if none were consumed.
451
-
452
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
453
- */
454
- consumeName() {
455
- return syntax.isNameStartChar(this.scanner.peek())
456
- ? this.scanner.consumeMatchFn(syntax.isNameChar)
457
- : emptyString;
458
- }
459
-
460
- /**
461
- Consumes a processing instruction if possible.
462
-
463
- @returns {boolean}
464
- Whether a processing instruction was consumed.
465
-
466
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
467
- */
468
- consumeProcessingInstruction() {
469
- let { scanner } = this;
470
- let mark = scanner.charIndex;
471
-
472
- if (!scanner.consumeStringFast('<?')) {
473
- return false;
474
- }
475
-
476
- let name = this.consumeName();
477
-
478
- if (name) {
479
- if (name.toLowerCase() === 'xml') {
480
- scanner.reset(mark);
481
- this.error("XML declaration isn't allowed here");
482
- }
483
- } else {
484
- this.error('Invalid processing instruction');
485
- }
486
-
487
- if (!this.consumeWhitespace()) {
488
- if (scanner.consumeStringFast('?>')) {
489
- this.addNode(new XmlProcessingInstruction(name));
490
- return true;
491
- }
492
-
493
- this.error('Whitespace is required after a processing instruction name');
494
- }
495
-
496
- let content = scanner.consumeUntilString('?>');
497
- this.validateChars(content);
498
-
499
- if (!scanner.consumeStringFast('?>')) {
500
- this.error('Unterminated processing instruction');
501
- }
502
-
503
- this.addNode(new XmlProcessingInstruction(name, content));
504
- return true;
505
- }
506
-
507
- /**
508
- Consumes a prolog if possible.
509
-
510
- @returns {boolean}
511
- Whether a prolog was consumed.
512
-
513
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
514
- */
515
- consumeProlog() {
516
- let { scanner } = this;
517
- let mark = scanner.charIndex;
518
-
519
- this.consumeXmlDeclaration();
520
-
521
- while (this.consumeMisc()) {} // eslint-disable-line no-empty
522
-
523
- if (this.consumeDoctypeDeclaration()) {
524
- while (this.consumeMisc()) {} // eslint-disable-line no-empty
525
- }
526
-
527
- return mark < scanner.charIndex;
528
- }
529
-
530
- /**
531
- Consumes a reference if possible.
532
-
533
- This differs from `consumeContentReference()` in that a consumed reference
534
- will be returned rather than added to the document.
535
-
536
- @returns {string|false}
537
- Parsed reference value, or `false` if nothing was consumed (to distinguish
538
- from a reference that resolves to an empty string).
539
-
540
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
541
- */
542
- consumeReference() {
543
- let { scanner } = this;
544
-
545
- if (scanner.peek() !== '&') {
546
- return false;
547
- }
548
-
549
- scanner.advance();
550
-
551
- let ref = scanner.consumeMatchFn(syntax.isReferenceChar);
552
-
553
- if (scanner.consume() !== ';') {
554
- this.error('Unterminated reference (a reference must end with `;`)');
555
- }
556
-
557
- let parsedValue;
558
-
559
- if (ref[0] === '#') {
560
- // This is a character reference.
561
- let codePoint = ref[1] === 'x'
562
- ? parseInt(ref.slice(2), 16) // Hex codepoint.
563
- : parseInt(ref.slice(1), 10); // Decimal codepoint.
564
-
565
- if (isNaN(codePoint)) {
566
- this.error('Invalid character reference');
567
- }
568
-
569
- parsedValue = String.fromCodePoint(codePoint);
570
-
571
- if (!syntax.isXmlChar(parsedValue)) {
572
- this.error('Character reference resolves to an invalid character');
573
- }
574
- } else {
575
- // This is an entity reference.
576
- parsedValue = syntax.predefinedEntities[ref];
577
-
578
- if (parsedValue === undefined) {
579
- let {
580
- ignoreUndefinedEntities,
581
- resolveUndefinedEntity
582
- } = this.options;
583
-
584
- let wrappedRef = `&${ref};`; // for backcompat with <= 2.x
585
-
586
- if (resolveUndefinedEntity) {
587
- let resolvedValue = resolveUndefinedEntity(wrappedRef);
588
-
589
- if (resolvedValue !== null && resolvedValue !== undefined) {
590
- let type = typeof resolvedValue;
591
-
592
- if (type !== 'string') {
593
- throw new TypeError(`\`resolveUndefinedEntity()\` must return a string, \`null\`, or \`undefined\`, but returned a value of type ${type}`);
594
- }
595
-
596
- return resolvedValue;
597
- }
598
- }
599
-
600
- if (ignoreUndefinedEntities) {
601
- return wrappedRef;
602
- }
603
-
604
- scanner.reset(-wrappedRef.length);
605
- this.error(`Named entity isn't defined: ${wrappedRef}`);
606
- }
607
- }
608
-
609
- return parsedValue;
610
- }
611
-
612
- /**
613
- Consumes a `SystemLiteral` if possible.
614
-
615
- A `SystemLiteral` is similar to an attribute value, but allows the characters
616
- `<` and `&` and doesn't replace references.
617
-
618
- @returns {string|false}
619
- Value of the `SystemLiteral` minus quotes, or `false` if nothing was
620
- consumed. An empty string indicates that a `SystemLiteral` was consumed but
621
- was empty.
622
-
623
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
624
- */
625
- consumeSystemLiteral() {
626
- let { scanner } = this;
627
- let quote = scanner.consumeStringFast('"') || scanner.consumeStringFast("'");
628
-
629
- if (!quote) {
630
- return false;
631
- }
632
-
633
- let value = scanner.consumeUntilString(quote);
634
- this.validateChars(value);
635
-
636
- if (!scanner.consumeStringFast(quote)) {
637
- this.error('Missing end quote');
638
- }
639
-
640
- return value;
641
- }
642
-
643
- /**
644
- Consumes one or more whitespace characters if possible.
645
-
646
- @returns {boolean}
647
- Whether any whitespace characters were consumed.
648
-
649
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
650
- */
651
- consumeWhitespace() {
652
- return Boolean(this.scanner.consumeMatchFn(syntax.isWhitespace));
653
- }
654
-
655
- /**
656
- Consumes an XML declaration if possible.
657
-
658
- @returns {boolean}
659
- Whether an XML declaration was consumed.
660
-
661
- @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
662
- */
663
- consumeXmlDeclaration() {
664
- let { scanner } = this;
665
-
666
- if (!scanner.consumeStringFast('<?xml')) {
667
- return false;
668
- }
669
-
670
- if (!this.consumeWhitespace()) {
671
- this.error('Invalid XML declaration');
672
- }
673
-
674
- let version = Boolean(scanner.consumeStringFast('version'))
675
- && this.consumeEqual()
676
- && this.consumeSystemLiteral();
677
-
678
- if (version === false) {
679
- this.error('XML version is missing or invalid');
680
- } else if (!/^1\.[0-9]+$/.test(version)) {
681
- this.error('Invalid character in version number');
682
- }
683
-
684
- if (this.consumeWhitespace()) {
685
- let encoding = Boolean(scanner.consumeStringFast('encoding'))
686
- && this.consumeEqual()
687
- && this.consumeSystemLiteral();
688
-
689
- if (encoding) {
690
- this.consumeWhitespace();
691
- }
692
-
693
- let standalone = Boolean(scanner.consumeStringFast('standalone'))
694
- && this.consumeEqual()
695
- && this.consumeSystemLiteral();
696
-
697
- if (standalone) {
698
- if (standalone !== 'yes' && standalone !== 'no') {
699
- this.error('Only "yes" and "no" are permitted as values of `standalone`');
700
- }
701
-
702
- this.consumeWhitespace();
703
- }
704
- }
705
-
706
- if (!scanner.consumeStringFast('?>')) {
707
- this.error('Invalid or unclosed XML declaration');
708
- }
709
-
710
- return true;
711
- }
712
-
713
- /**
714
- Throws an error at the current scanner position.
715
-
716
- @param {string} message
717
- */
718
- error(message) {
719
- let { charIndex, string: xml } = this.scanner;
720
- let column = 1;
721
- let excerpt = '';
722
- let line = 1;
723
-
724
- // Find the line and column where the error occurred.
725
- for (let i = 0; i < charIndex; ++i) {
726
- let char = xml[i];
727
-
728
- if (char === '\n') {
729
- column = 1;
730
- excerpt = '';
731
- line += 1;
732
- } else {
733
- column += 1;
734
- excerpt += char;
735
- }
736
- }
737
-
738
- let eol = xml.indexOf('\n', charIndex);
739
-
740
- excerpt += eol === -1
741
- ? xml.slice(charIndex)
742
- : xml.slice(charIndex, eol);
743
-
744
- let excerptStart = 0;
745
-
746
- // Keep the excerpt below 50 chars, but always keep the error position in
747
- // view.
748
- if (excerpt.length > 50) {
749
- if (column < 40) {
750
- excerpt = excerpt.slice(0, 50);
751
- } else {
752
- excerptStart = column - 20;
753
- excerpt = excerpt.slice(excerptStart, column + 30);
754
- }
755
- }
756
-
757
- let err = new Error(
758
- `${message} (line ${line}, column ${column})\n`
759
- + ` ${excerpt}\n`
760
- + ' '.repeat(column - excerptStart + 1) + '^\n'
761
- );
762
-
763
- Object.assign(err, {
764
- column,
765
- excerpt,
766
- line,
767
- pos: charIndex
768
- });
769
-
770
- throw err;
771
- }
772
-
773
- /**
774
- Throws an invalid character error if any character in the given _string_ isn't
775
- a valid XML character.
776
-
777
- @param {string} string
778
- */
779
- validateChars(string) {
780
- let charIndex = 0;
781
-
782
- for (let char of string) {
783
- if (syntax.isNotXmlChar(char)) {
784
- this.scanner.reset(-([ ...string ].length - charIndex));
785
- this.error('Invalid character');
786
- }
787
-
788
- charIndex += 1;
789
- }
790
- }
791
- }
792
-
793
- module.exports = Parser;
794
-
795
- // -- Private Functions --------------------------------------------------------
796
-
797
- /**
798
- Normalizes the given XML string by stripping a byte order mark (if present) and
799
- replacing CRLF sequences and lone CR characters with LF characters.
800
-
801
- @param {string} xml
802
- @returns {string}
803
- */
804
- function normalizeXmlString(xml) {
805
- if (xml[0] === '\uFEFF') {
806
- xml = xml.slice(1);
807
- }
808
-
809
- return xml.replace(/\r\n?/g, '\n');
810
- }
811
-
812
- /** @typedef {import('./XmlNode')} XmlNode */