@rgrove/parse-xml 3.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +72 -97
- package/dist/browser.js +774 -0
- package/dist/browser.js.map +7 -0
- package/dist/global.min.js +10 -0
- package/dist/global.min.js.map +7 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +50 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/Parser.d.ts +218 -0
- package/dist/lib/Parser.d.ts.map +1 -0
- package/dist/lib/Parser.js +638 -0
- package/dist/lib/Parser.js.map +1 -0
- package/dist/lib/StringScanner.d.ts +97 -0
- package/dist/lib/StringScanner.d.ts.map +1 -0
- package/dist/lib/StringScanner.js +210 -0
- package/dist/lib/StringScanner.js.map +1 -0
- package/dist/lib/XmlCdata.d.ts +8 -0
- package/dist/lib/XmlCdata.d.ts.map +1 -0
- package/dist/lib/XmlCdata.js +15 -0
- package/dist/lib/XmlCdata.js.map +1 -0
- package/dist/lib/XmlComment.d.ts +16 -0
- package/dist/lib/XmlComment.d.ts.map +1 -0
- package/dist/lib/XmlComment.js +23 -0
- package/dist/lib/XmlComment.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +29 -0
- package/dist/lib/XmlDocument.d.ts.map +1 -0
- package/dist/lib/XmlDocument.js +47 -0
- package/dist/lib/XmlDocument.js.map +1 -0
- package/dist/lib/XmlElement.d.ts +40 -0
- package/dist/lib/XmlElement.d.ts.map +1 -0
- package/dist/lib/XmlElement.js +51 -0
- package/dist/lib/XmlElement.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +74 -0
- package/dist/lib/XmlNode.d.ts.map +1 -0
- package/dist/lib/XmlNode.js +96 -0
- package/dist/lib/XmlNode.js.map +1 -0
- package/dist/lib/XmlProcessingInstruction.d.ts +22 -0
- package/dist/lib/XmlProcessingInstruction.d.ts.map +1 -0
- package/dist/lib/XmlProcessingInstruction.js +25 -0
- package/dist/lib/XmlProcessingInstruction.js.map +1 -0
- package/dist/lib/XmlText.d.ts +16 -0
- package/dist/lib/XmlText.d.ts.map +1 -0
- package/dist/lib/XmlText.js +23 -0
- package/dist/lib/XmlText.js.map +1 -0
- package/dist/lib/syntax.d.ts +69 -0
- package/dist/lib/syntax.d.ts.map +1 -0
- package/dist/lib/syntax.js +133 -0
- package/dist/lib/syntax.js.map +1 -0
- package/dist/lib/types.d.ts +5 -0
- package/dist/lib/types.d.ts.map +1 -0
- package/dist/lib/types.js +3 -0
- package/dist/lib/types.js.map +1 -0
- package/package.json +33 -26
- package/src/index.ts +30 -0
- package/src/lib/Parser.ts +819 -0
- package/src/lib/StringScanner.ts +254 -0
- package/src/lib/XmlCdata.ts +11 -0
- package/src/lib/XmlComment.ts +26 -0
- package/src/lib/XmlDocument.ts +57 -0
- package/src/lib/XmlElement.ts +81 -0
- package/src/lib/XmlNode.ts +107 -0
- package/src/lib/XmlProcessingInstruction.ts +35 -0
- package/src/lib/XmlText.ts +26 -0
- package/src/lib/syntax.ts +136 -0
- package/src/lib/types.ts +2 -0
- package/CHANGELOG.md +0 -162
- package/dist/types/index.d.ts +0 -68
- package/dist/types/index.d.ts.map +0 -1
- package/dist/types/lib/Parser.d.ts +0 -234
- package/dist/types/lib/Parser.d.ts.map +0 -1
- package/dist/types/lib/StringScanner.d.ts +0 -139
- package/dist/types/lib/StringScanner.d.ts.map +0 -1
- package/dist/types/lib/XmlCdata.d.ts +0 -11
- package/dist/types/lib/XmlCdata.d.ts.map +0 -1
- package/dist/types/lib/XmlComment.d.ts +0 -21
- package/dist/types/lib/XmlComment.d.ts.map +0 -1
- package/dist/types/lib/XmlDocument.d.ts +0 -42
- package/dist/types/lib/XmlDocument.d.ts.map +0 -1
- package/dist/types/lib/XmlElement.d.ts +0 -62
- package/dist/types/lib/XmlElement.d.ts.map +0 -1
- package/dist/types/lib/XmlNode.d.ts +0 -78
- package/dist/types/lib/XmlNode.d.ts.map +0 -1
- package/dist/types/lib/XmlProcessingInstruction.d.ts +0 -30
- package/dist/types/lib/XmlProcessingInstruction.d.ts.map +0 -1
- package/dist/types/lib/XmlText.d.ts +0 -21
- package/dist/types/lib/XmlText.d.ts.map +0 -1
- package/dist/types/lib/syntax.d.ts +0 -59
- package/dist/types/lib/syntax.d.ts.map +0 -1
- package/dist/umd/parse-xml.min.js +0 -2
- package/dist/umd/parse-xml.min.js.map +0 -1
- package/src/index.js +0 -67
- package/src/lib/Parser.js +0 -812
- package/src/lib/StringScanner.js +0 -312
- package/src/lib/XmlCdata.js +0 -17
- package/src/lib/XmlComment.js +0 -37
- package/src/lib/XmlDocument.js +0 -69
- package/src/lib/XmlElement.js +0 -101
- package/src/lib/XmlNode.js +0 -152
- package/src/lib/XmlProcessingInstruction.js +0 -48
- package/src/lib/XmlText.js +0 -37
- package/src/lib/syntax.js +0 -153
package/src/lib/Parser.js
DELETED
|
@@ -1,812 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
const StringScanner = require('./StringScanner');
|
|
4
|
-
const syntax = require('./syntax');
|
|
5
|
-
const XmlCdata = require('./XmlCdata');
|
|
6
|
-
const XmlComment = require('./XmlComment');
|
|
7
|
-
const XmlDocument = require('./XmlDocument');
|
|
8
|
-
const XmlElement = require('./XmlElement');
|
|
9
|
-
const XmlProcessingInstruction = require('./XmlProcessingInstruction');
|
|
10
|
-
const XmlText = require('./XmlText');
|
|
11
|
-
|
|
12
|
-
const emptyString = '';
|
|
13
|
-
|
|
14
|
-
/**
|
|
15
|
-
Parses an XML string into an `XmlDocument`.
|
|
16
|
-
|
|
17
|
-
@private
|
|
18
|
-
*/
|
|
19
|
-
class Parser {
|
|
20
|
-
/**
|
|
21
|
-
@param {string} xml
|
|
22
|
-
XML string to parse.
|
|
23
|
-
|
|
24
|
-
@param {object} [options]
|
|
25
|
-
Parsing options.
|
|
26
|
-
|
|
27
|
-
@param {boolean} [options.ignoreUndefinedEntities=false]
|
|
28
|
-
@param {boolean} [options.preserveCdata=false]
|
|
29
|
-
@param {boolean} [options.preserveComments=false]
|
|
30
|
-
@param {(entity: string) => string?} [options.resolveUndefinedEntity]
|
|
31
|
-
@param {boolean} [options.sortAttributes=false]
|
|
32
|
-
*/
|
|
33
|
-
constructor(xml, options = {}) {
|
|
34
|
-
/** @type {XmlDocument} */
|
|
35
|
-
this.document = new XmlDocument();
|
|
36
|
-
|
|
37
|
-
/** @type {XmlDocument|XmlElement} */
|
|
38
|
-
this.currentNode = this.document;
|
|
39
|
-
|
|
40
|
-
this.options = options;
|
|
41
|
-
this.scanner = new StringScanner(normalizeXmlString(xml));
|
|
42
|
-
|
|
43
|
-
this.consumeProlog();
|
|
44
|
-
|
|
45
|
-
if (!this.consumeElement()) {
|
|
46
|
-
this.error('Root element is missing or invalid');
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
50
|
-
|
|
51
|
-
if (!this.scanner.isEnd) {
|
|
52
|
-
this.error('Extra content at the end of the document');
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
/**
|
|
57
|
-
Adds the given `XmlNode` as a child of `this.currentNode`.
|
|
58
|
-
|
|
59
|
-
@param {XmlNode} node
|
|
60
|
-
*/
|
|
61
|
-
addNode(node) {
|
|
62
|
-
node.parent = this.currentNode;
|
|
63
|
-
|
|
64
|
-
// @ts-ignore
|
|
65
|
-
this.currentNode.children.push(node);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
Adds the given _text_ to the document, either by appending it to a preceding
|
|
70
|
-
`XmlText` node (if possible) or by creating a new `XmlText` node.
|
|
71
|
-
|
|
72
|
-
@param {string} text
|
|
73
|
-
*/
|
|
74
|
-
addText(text) {
|
|
75
|
-
let { children } = this.currentNode;
|
|
76
|
-
|
|
77
|
-
if (children.length > 0) {
|
|
78
|
-
let prevNode = children[children.length - 1];
|
|
79
|
-
|
|
80
|
-
if (prevNode instanceof XmlText) {
|
|
81
|
-
// The previous node is a text node, so we can append to it and avoid
|
|
82
|
-
// creating another node.
|
|
83
|
-
prevNode.text += text;
|
|
84
|
-
return;
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
this.addNode(new XmlText(text));
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
/**
|
|
92
|
-
Consumes an `AttValue` (attribute value) if possible.
|
|
93
|
-
|
|
94
|
-
@returns {string|false}
|
|
95
|
-
Contents of the `AttValue` minus quotes, or `false` if nothing was consumed.
|
|
96
|
-
An empty string indicates that an `AttValue` was consumed but was empty.
|
|
97
|
-
|
|
98
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
|
99
|
-
*/
|
|
100
|
-
consumeAttributeValue() {
|
|
101
|
-
let { scanner } = this;
|
|
102
|
-
let quote = scanner.peek();
|
|
103
|
-
|
|
104
|
-
if (quote !== '"' && quote !== "'") {
|
|
105
|
-
return false;
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
scanner.advance();
|
|
109
|
-
|
|
110
|
-
let chars;
|
|
111
|
-
let isClosed = false;
|
|
112
|
-
let value = emptyString;
|
|
113
|
-
let regex = quote === '"'
|
|
114
|
-
? /[^"&<]+/y
|
|
115
|
-
: /[^'&<]+/y;
|
|
116
|
-
|
|
117
|
-
matchLoop: while (!scanner.isEnd) {
|
|
118
|
-
chars = scanner.consumeMatch(regex);
|
|
119
|
-
|
|
120
|
-
if (chars) {
|
|
121
|
-
this.validateChars(chars);
|
|
122
|
-
value += chars.replace(/[\t\r\n]/g, ' ');
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
let nextChar = scanner.peek();
|
|
126
|
-
|
|
127
|
-
switch (nextChar) {
|
|
128
|
-
case quote:
|
|
129
|
-
isClosed = true;
|
|
130
|
-
break matchLoop;
|
|
131
|
-
|
|
132
|
-
case '&':
|
|
133
|
-
value += this.consumeReference();
|
|
134
|
-
continue;
|
|
135
|
-
|
|
136
|
-
case '<':
|
|
137
|
-
this.error('Unescaped `<` is not allowed in an attribute value'); /* istanbul ignore next */
|
|
138
|
-
break;
|
|
139
|
-
|
|
140
|
-
case emptyString:
|
|
141
|
-
this.error('Unclosed attribute'); /* istanbul ignore next */
|
|
142
|
-
break;
|
|
143
|
-
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
if (!isClosed) {
|
|
148
|
-
this.error('Unclosed attribute');
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
scanner.advance();
|
|
152
|
-
return value;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/**
|
|
156
|
-
Consumes a CDATA section if possible.
|
|
157
|
-
|
|
158
|
-
@returns {boolean}
|
|
159
|
-
Whether a CDATA section was consumed.
|
|
160
|
-
|
|
161
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
|
|
162
|
-
*/
|
|
163
|
-
consumeCdataSection() {
|
|
164
|
-
let { scanner } = this;
|
|
165
|
-
|
|
166
|
-
if (!scanner.consumeStringFast('<![CDATA[')) {
|
|
167
|
-
return false;
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
let text = scanner.consumeUntilString(']]>');
|
|
171
|
-
this.validateChars(text);
|
|
172
|
-
|
|
173
|
-
if (!scanner.consumeStringFast(']]>')) {
|
|
174
|
-
this.error('Unclosed CDATA section');
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
if (this.options.preserveCdata) {
|
|
178
|
-
this.addNode(new XmlCdata(text));
|
|
179
|
-
} else {
|
|
180
|
-
this.addText(text);
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
return true;
|
|
184
|
-
}
|
|
185
|
-
|
|
186
|
-
/**
|
|
187
|
-
Consumes character data if possible.
|
|
188
|
-
|
|
189
|
-
@returns {boolean}
|
|
190
|
-
Whether character data was consumed.
|
|
191
|
-
|
|
192
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
|
|
193
|
-
*/
|
|
194
|
-
consumeCharData() {
|
|
195
|
-
let { scanner } = this;
|
|
196
|
-
let charData = scanner.consumeUntilMatch(/<|&|]]>/g);
|
|
197
|
-
|
|
198
|
-
if (!charData) {
|
|
199
|
-
return false;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
this.validateChars(charData);
|
|
203
|
-
|
|
204
|
-
if (scanner.peek() === ']' && scanner.peek(3) === ']]>') {
|
|
205
|
-
this.error('Element content may not contain the CDATA section close delimiter `]]>`');
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
this.addText(charData);
|
|
209
|
-
return true;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
/**
|
|
213
|
-
Consumes a comment if possible.
|
|
214
|
-
|
|
215
|
-
@returns {boolean}
|
|
216
|
-
Whether a comment was consumed.
|
|
217
|
-
|
|
218
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
|
|
219
|
-
*/
|
|
220
|
-
consumeComment() {
|
|
221
|
-
let { scanner } = this;
|
|
222
|
-
|
|
223
|
-
if (!scanner.consumeStringFast('<!--')) {
|
|
224
|
-
return false;
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
let content = scanner.consumeUntilString('--');
|
|
228
|
-
this.validateChars(content);
|
|
229
|
-
|
|
230
|
-
if (!scanner.consumeStringFast('-->')) {
|
|
231
|
-
if (scanner.peek(2) === '--') {
|
|
232
|
-
this.error("The string `--` isn't allowed inside a comment");
|
|
233
|
-
} else {
|
|
234
|
-
this.error('Unclosed comment');
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
if (this.options.preserveComments) {
|
|
239
|
-
this.addNode(new XmlComment(content.trim()));
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
return true;
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
/**
|
|
246
|
-
Consumes a reference in a content context if possible.
|
|
247
|
-
|
|
248
|
-
This differs from `consumeReference()` in that a consumed reference will be
|
|
249
|
-
added to the document as a text node instead of returned.
|
|
250
|
-
|
|
251
|
-
@returns {boolean}
|
|
252
|
-
Whether a reference was consumed.
|
|
253
|
-
|
|
254
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
|
255
|
-
*/
|
|
256
|
-
consumeContentReference() {
|
|
257
|
-
let ref = this.consumeReference();
|
|
258
|
-
|
|
259
|
-
if (ref) {
|
|
260
|
-
this.addText(ref);
|
|
261
|
-
return true;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
return false;
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
/**
|
|
268
|
-
Consumes a doctype declaration if possible.
|
|
269
|
-
|
|
270
|
-
This is a loose implementation since doctype declarations are currently
|
|
271
|
-
discarded without further parsing.
|
|
272
|
-
|
|
273
|
-
@returns {boolean}
|
|
274
|
-
Whether a doctype declaration was consumed.
|
|
275
|
-
|
|
276
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
|
|
277
|
-
*/
|
|
278
|
-
consumeDoctypeDeclaration() {
|
|
279
|
-
let { scanner } = this;
|
|
280
|
-
|
|
281
|
-
if (!scanner.consumeStringFast('<!DOCTYPE')
|
|
282
|
-
|| !this.consumeWhitespace()) {
|
|
283
|
-
|
|
284
|
-
return false;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
scanner.consumeMatch(/[^[>]+/y);
|
|
288
|
-
|
|
289
|
-
if (scanner.consumeMatch(/\[[\s\S]+?\][\x20\t\r\n]*>/y)) {
|
|
290
|
-
return true;
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
if (!scanner.consumeStringFast('>')) {
|
|
294
|
-
this.error('Unclosed doctype declaration');
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
return true;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
Consumes an element if possible.
|
|
302
|
-
|
|
303
|
-
@returns {boolean}
|
|
304
|
-
Whether an element was consumed.
|
|
305
|
-
|
|
306
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
|
|
307
|
-
*/
|
|
308
|
-
consumeElement() {
|
|
309
|
-
let { scanner } = this;
|
|
310
|
-
let mark = scanner.charIndex;
|
|
311
|
-
|
|
312
|
-
if (scanner.peek() !== '<') {
|
|
313
|
-
return false;
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
scanner.advance();
|
|
317
|
-
let name = this.consumeName();
|
|
318
|
-
|
|
319
|
-
if (!name) {
|
|
320
|
-
scanner.reset(mark);
|
|
321
|
-
return false;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
let attributes = Object.create(null);
|
|
325
|
-
|
|
326
|
-
while (this.consumeWhitespace()) {
|
|
327
|
-
let attrName = this.consumeName();
|
|
328
|
-
|
|
329
|
-
if (!attrName) {
|
|
330
|
-
continue;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
let attrValue = this.consumeEqual()
|
|
334
|
-
&& this.consumeAttributeValue();
|
|
335
|
-
|
|
336
|
-
if (attrValue === false) {
|
|
337
|
-
this.error('Attribute value expected');
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
if (attrName in attributes) {
|
|
341
|
-
this.error(`Duplicate attribute: ${attrName}`);
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
if (attrName === 'xml:space'
|
|
345
|
-
&& attrValue !== 'default'
|
|
346
|
-
&& attrValue !== 'preserve') {
|
|
347
|
-
|
|
348
|
-
this.error('Value of the `xml:space` attribute must be "default" or "preserve"');
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
attributes[attrName] = attrValue;
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
if (this.options.sortAttributes) {
|
|
355
|
-
let attrNames = Object.keys(attributes).sort();
|
|
356
|
-
let sortedAttributes = Object.create(null);
|
|
357
|
-
|
|
358
|
-
for (let i = 0; i < attrNames.length; ++i) {
|
|
359
|
-
let attrName = attrNames[i];
|
|
360
|
-
sortedAttributes[attrName] = attributes[attrName];
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
attributes = sortedAttributes;
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
let isEmpty = Boolean(scanner.consumeStringFast('/>'));
|
|
367
|
-
let element = new XmlElement(name, attributes);
|
|
368
|
-
|
|
369
|
-
element.parent = this.currentNode;
|
|
370
|
-
|
|
371
|
-
if (!isEmpty) {
|
|
372
|
-
if (!scanner.consumeStringFast('>')) {
|
|
373
|
-
this.error(`Unclosed start tag for element \`${name}\``);
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
this.currentNode = element;
|
|
377
|
-
this.consumeCharData();
|
|
378
|
-
|
|
379
|
-
while (
|
|
380
|
-
this.consumeElement()
|
|
381
|
-
|| this.consumeContentReference()
|
|
382
|
-
|| this.consumeCdataSection()
|
|
383
|
-
|| this.consumeProcessingInstruction()
|
|
384
|
-
|| this.consumeComment()
|
|
385
|
-
) {
|
|
386
|
-
this.consumeCharData();
|
|
387
|
-
}
|
|
388
|
-
|
|
389
|
-
let endTagMark = scanner.charIndex;
|
|
390
|
-
let endTagName;
|
|
391
|
-
|
|
392
|
-
if (!scanner.consumeStringFast('</')
|
|
393
|
-
|| !(endTagName = this.consumeName())
|
|
394
|
-
|| endTagName !== name) {
|
|
395
|
-
|
|
396
|
-
scanner.reset(endTagMark);
|
|
397
|
-
this.error(`Missing end tag for element ${name}`);
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
this.consumeWhitespace();
|
|
401
|
-
|
|
402
|
-
if (!scanner.consumeStringFast('>')) {
|
|
403
|
-
this.error(`Unclosed end tag for element ${name}`);
|
|
404
|
-
}
|
|
405
|
-
|
|
406
|
-
this.currentNode = element.parent;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
this.addNode(element);
|
|
410
|
-
return true;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
/**
|
|
414
|
-
Consumes an `Eq` production if possible.
|
|
415
|
-
|
|
416
|
-
@returns {boolean}
|
|
417
|
-
Whether an `Eq` production was consumed.
|
|
418
|
-
|
|
419
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
|
|
420
|
-
*/
|
|
421
|
-
consumeEqual() {
|
|
422
|
-
this.consumeWhitespace();
|
|
423
|
-
|
|
424
|
-
if (this.scanner.consumeStringFast('=')) {
|
|
425
|
-
this.consumeWhitespace();
|
|
426
|
-
return true;
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
return false;
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
/**
|
|
433
|
-
Consumes `Misc` content if possible.
|
|
434
|
-
|
|
435
|
-
@returns {boolean}
|
|
436
|
-
Whether anything was consumed.
|
|
437
|
-
|
|
438
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
|
|
439
|
-
*/
|
|
440
|
-
consumeMisc() {
|
|
441
|
-
return this.consumeComment()
|
|
442
|
-
|| this.consumeProcessingInstruction()
|
|
443
|
-
|| this.consumeWhitespace();
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
/**
|
|
447
|
-
Consumes one or more `Name` characters if possible.
|
|
448
|
-
|
|
449
|
-
@returns {string}
|
|
450
|
-
`Name` characters, or an empty string if none were consumed.
|
|
451
|
-
|
|
452
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
|
|
453
|
-
*/
|
|
454
|
-
consumeName() {
|
|
455
|
-
return syntax.isNameStartChar(this.scanner.peek())
|
|
456
|
-
? this.scanner.consumeMatchFn(syntax.isNameChar)
|
|
457
|
-
: emptyString;
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
/**
|
|
461
|
-
Consumes a processing instruction if possible.
|
|
462
|
-
|
|
463
|
-
@returns {boolean}
|
|
464
|
-
Whether a processing instruction was consumed.
|
|
465
|
-
|
|
466
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
|
|
467
|
-
*/
|
|
468
|
-
consumeProcessingInstruction() {
|
|
469
|
-
let { scanner } = this;
|
|
470
|
-
let mark = scanner.charIndex;
|
|
471
|
-
|
|
472
|
-
if (!scanner.consumeStringFast('<?')) {
|
|
473
|
-
return false;
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
let name = this.consumeName();
|
|
477
|
-
|
|
478
|
-
if (name) {
|
|
479
|
-
if (name.toLowerCase() === 'xml') {
|
|
480
|
-
scanner.reset(mark);
|
|
481
|
-
this.error("XML declaration isn't allowed here");
|
|
482
|
-
}
|
|
483
|
-
} else {
|
|
484
|
-
this.error('Invalid processing instruction');
|
|
485
|
-
}
|
|
486
|
-
|
|
487
|
-
if (!this.consumeWhitespace()) {
|
|
488
|
-
if (scanner.consumeStringFast('?>')) {
|
|
489
|
-
this.addNode(new XmlProcessingInstruction(name));
|
|
490
|
-
return true;
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
this.error('Whitespace is required after a processing instruction name');
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
let content = scanner.consumeUntilString('?>');
|
|
497
|
-
this.validateChars(content);
|
|
498
|
-
|
|
499
|
-
if (!scanner.consumeStringFast('?>')) {
|
|
500
|
-
this.error('Unterminated processing instruction');
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
this.addNode(new XmlProcessingInstruction(name, content));
|
|
504
|
-
return true;
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
/**
|
|
508
|
-
Consumes a prolog if possible.
|
|
509
|
-
|
|
510
|
-
@returns {boolean}
|
|
511
|
-
Whether a prolog was consumed.
|
|
512
|
-
|
|
513
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
|
|
514
|
-
*/
|
|
515
|
-
consumeProlog() {
|
|
516
|
-
let { scanner } = this;
|
|
517
|
-
let mark = scanner.charIndex;
|
|
518
|
-
|
|
519
|
-
this.consumeXmlDeclaration();
|
|
520
|
-
|
|
521
|
-
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
522
|
-
|
|
523
|
-
if (this.consumeDoctypeDeclaration()) {
|
|
524
|
-
while (this.consumeMisc()) {} // eslint-disable-line no-empty
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
return mark < scanner.charIndex;
|
|
528
|
-
}
|
|
529
|
-
|
|
530
|
-
/**
|
|
531
|
-
Consumes a reference if possible.
|
|
532
|
-
|
|
533
|
-
This differs from `consumeContentReference()` in that a consumed reference
|
|
534
|
-
will be returned rather than added to the document.
|
|
535
|
-
|
|
536
|
-
@returns {string|false}
|
|
537
|
-
Parsed reference value, or `false` if nothing was consumed (to distinguish
|
|
538
|
-
from a reference that resolves to an empty string).
|
|
539
|
-
|
|
540
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
|
|
541
|
-
*/
|
|
542
|
-
consumeReference() {
|
|
543
|
-
let { scanner } = this;
|
|
544
|
-
|
|
545
|
-
if (scanner.peek() !== '&') {
|
|
546
|
-
return false;
|
|
547
|
-
}
|
|
548
|
-
|
|
549
|
-
scanner.advance();
|
|
550
|
-
|
|
551
|
-
let ref = scanner.consumeMatchFn(syntax.isReferenceChar);
|
|
552
|
-
|
|
553
|
-
if (scanner.consume() !== ';') {
|
|
554
|
-
this.error('Unterminated reference (a reference must end with `;`)');
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
let parsedValue;
|
|
558
|
-
|
|
559
|
-
if (ref[0] === '#') {
|
|
560
|
-
// This is a character reference.
|
|
561
|
-
let codePoint = ref[1] === 'x'
|
|
562
|
-
? parseInt(ref.slice(2), 16) // Hex codepoint.
|
|
563
|
-
: parseInt(ref.slice(1), 10); // Decimal codepoint.
|
|
564
|
-
|
|
565
|
-
if (isNaN(codePoint)) {
|
|
566
|
-
this.error('Invalid character reference');
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
parsedValue = String.fromCodePoint(codePoint);
|
|
570
|
-
|
|
571
|
-
if (!syntax.isXmlChar(parsedValue)) {
|
|
572
|
-
this.error('Character reference resolves to an invalid character');
|
|
573
|
-
}
|
|
574
|
-
} else {
|
|
575
|
-
// This is an entity reference.
|
|
576
|
-
parsedValue = syntax.predefinedEntities[ref];
|
|
577
|
-
|
|
578
|
-
if (parsedValue === undefined) {
|
|
579
|
-
let {
|
|
580
|
-
ignoreUndefinedEntities,
|
|
581
|
-
resolveUndefinedEntity
|
|
582
|
-
} = this.options;
|
|
583
|
-
|
|
584
|
-
let wrappedRef = `&${ref};`; // for backcompat with <= 2.x
|
|
585
|
-
|
|
586
|
-
if (resolveUndefinedEntity) {
|
|
587
|
-
let resolvedValue = resolveUndefinedEntity(wrappedRef);
|
|
588
|
-
|
|
589
|
-
if (resolvedValue !== null && resolvedValue !== undefined) {
|
|
590
|
-
let type = typeof resolvedValue;
|
|
591
|
-
|
|
592
|
-
if (type !== 'string') {
|
|
593
|
-
throw new TypeError(`\`resolveUndefinedEntity()\` must return a string, \`null\`, or \`undefined\`, but returned a value of type ${type}`);
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
return resolvedValue;
|
|
597
|
-
}
|
|
598
|
-
}
|
|
599
|
-
|
|
600
|
-
if (ignoreUndefinedEntities) {
|
|
601
|
-
return wrappedRef;
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
scanner.reset(-wrappedRef.length);
|
|
605
|
-
this.error(`Named entity isn't defined: ${wrappedRef}`);
|
|
606
|
-
}
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
return parsedValue;
|
|
610
|
-
}
|
|
611
|
-
|
|
612
|
-
/**
|
|
613
|
-
Consumes a `SystemLiteral` if possible.
|
|
614
|
-
|
|
615
|
-
A `SystemLiteral` is similar to an attribute value, but allows the characters
|
|
616
|
-
`<` and `&` and doesn't replace references.
|
|
617
|
-
|
|
618
|
-
@returns {string|false}
|
|
619
|
-
Value of the `SystemLiteral` minus quotes, or `false` if nothing was
|
|
620
|
-
consumed. An empty string indicates that a `SystemLiteral` was consumed but
|
|
621
|
-
was empty.
|
|
622
|
-
|
|
623
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
|
|
624
|
-
*/
|
|
625
|
-
consumeSystemLiteral() {
|
|
626
|
-
let { scanner } = this;
|
|
627
|
-
let quote = scanner.consumeStringFast('"') || scanner.consumeStringFast("'");
|
|
628
|
-
|
|
629
|
-
if (!quote) {
|
|
630
|
-
return false;
|
|
631
|
-
}
|
|
632
|
-
|
|
633
|
-
let value = scanner.consumeUntilString(quote);
|
|
634
|
-
this.validateChars(value);
|
|
635
|
-
|
|
636
|
-
if (!scanner.consumeStringFast(quote)) {
|
|
637
|
-
this.error('Missing end quote');
|
|
638
|
-
}
|
|
639
|
-
|
|
640
|
-
return value;
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
/**
|
|
644
|
-
Consumes one or more whitespace characters if possible.
|
|
645
|
-
|
|
646
|
-
@returns {boolean}
|
|
647
|
-
Whether any whitespace characters were consumed.
|
|
648
|
-
|
|
649
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
650
|
-
*/
|
|
651
|
-
consumeWhitespace() {
|
|
652
|
-
return Boolean(this.scanner.consumeMatchFn(syntax.isWhitespace));
|
|
653
|
-
}
|
|
654
|
-
|
|
655
|
-
/**
|
|
656
|
-
Consumes an XML declaration if possible.
|
|
657
|
-
|
|
658
|
-
@returns {boolean}
|
|
659
|
-
Whether an XML declaration was consumed.
|
|
660
|
-
|
|
661
|
-
@see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
|
|
662
|
-
*/
|
|
663
|
-
consumeXmlDeclaration() {
|
|
664
|
-
let { scanner } = this;
|
|
665
|
-
|
|
666
|
-
if (!scanner.consumeStringFast('<?xml')) {
|
|
667
|
-
return false;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
if (!this.consumeWhitespace()) {
|
|
671
|
-
this.error('Invalid XML declaration');
|
|
672
|
-
}
|
|
673
|
-
|
|
674
|
-
let version = Boolean(scanner.consumeStringFast('version'))
|
|
675
|
-
&& this.consumeEqual()
|
|
676
|
-
&& this.consumeSystemLiteral();
|
|
677
|
-
|
|
678
|
-
if (version === false) {
|
|
679
|
-
this.error('XML version is missing or invalid');
|
|
680
|
-
} else if (!/^1\.[0-9]+$/.test(version)) {
|
|
681
|
-
this.error('Invalid character in version number');
|
|
682
|
-
}
|
|
683
|
-
|
|
684
|
-
if (this.consumeWhitespace()) {
|
|
685
|
-
let encoding = Boolean(scanner.consumeStringFast('encoding'))
|
|
686
|
-
&& this.consumeEqual()
|
|
687
|
-
&& this.consumeSystemLiteral();
|
|
688
|
-
|
|
689
|
-
if (encoding) {
|
|
690
|
-
this.consumeWhitespace();
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
let standalone = Boolean(scanner.consumeStringFast('standalone'))
|
|
694
|
-
&& this.consumeEqual()
|
|
695
|
-
&& this.consumeSystemLiteral();
|
|
696
|
-
|
|
697
|
-
if (standalone) {
|
|
698
|
-
if (standalone !== 'yes' && standalone !== 'no') {
|
|
699
|
-
this.error('Only "yes" and "no" are permitted as values of `standalone`');
|
|
700
|
-
}
|
|
701
|
-
|
|
702
|
-
this.consumeWhitespace();
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
|
|
706
|
-
if (!scanner.consumeStringFast('?>')) {
|
|
707
|
-
this.error('Invalid or unclosed XML declaration');
|
|
708
|
-
}
|
|
709
|
-
|
|
710
|
-
return true;
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
/**
|
|
714
|
-
Throws an error at the current scanner position.
|
|
715
|
-
|
|
716
|
-
@param {string} message
|
|
717
|
-
*/
|
|
718
|
-
error(message) {
|
|
719
|
-
let { charIndex, string: xml } = this.scanner;
|
|
720
|
-
let column = 1;
|
|
721
|
-
let excerpt = '';
|
|
722
|
-
let line = 1;
|
|
723
|
-
|
|
724
|
-
// Find the line and column where the error occurred.
|
|
725
|
-
for (let i = 0; i < charIndex; ++i) {
|
|
726
|
-
let char = xml[i];
|
|
727
|
-
|
|
728
|
-
if (char === '\n') {
|
|
729
|
-
column = 1;
|
|
730
|
-
excerpt = '';
|
|
731
|
-
line += 1;
|
|
732
|
-
} else {
|
|
733
|
-
column += 1;
|
|
734
|
-
excerpt += char;
|
|
735
|
-
}
|
|
736
|
-
}
|
|
737
|
-
|
|
738
|
-
let eol = xml.indexOf('\n', charIndex);
|
|
739
|
-
|
|
740
|
-
excerpt += eol === -1
|
|
741
|
-
? xml.slice(charIndex)
|
|
742
|
-
: xml.slice(charIndex, eol);
|
|
743
|
-
|
|
744
|
-
let excerptStart = 0;
|
|
745
|
-
|
|
746
|
-
// Keep the excerpt below 50 chars, but always keep the error position in
|
|
747
|
-
// view.
|
|
748
|
-
if (excerpt.length > 50) {
|
|
749
|
-
if (column < 40) {
|
|
750
|
-
excerpt = excerpt.slice(0, 50);
|
|
751
|
-
} else {
|
|
752
|
-
excerptStart = column - 20;
|
|
753
|
-
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
754
|
-
}
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
let err = new Error(
|
|
758
|
-
`${message} (line ${line}, column ${column})\n`
|
|
759
|
-
+ ` ${excerpt}\n`
|
|
760
|
-
+ ' '.repeat(column - excerptStart + 1) + '^\n'
|
|
761
|
-
);
|
|
762
|
-
|
|
763
|
-
Object.assign(err, {
|
|
764
|
-
column,
|
|
765
|
-
excerpt,
|
|
766
|
-
line,
|
|
767
|
-
pos: charIndex
|
|
768
|
-
});
|
|
769
|
-
|
|
770
|
-
throw err;
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
/**
|
|
774
|
-
Throws an invalid character error if any character in the given _string_ isn't
|
|
775
|
-
a valid XML character.
|
|
776
|
-
|
|
777
|
-
@param {string} string
|
|
778
|
-
*/
|
|
779
|
-
validateChars(string) {
|
|
780
|
-
let charIndex = 0;
|
|
781
|
-
|
|
782
|
-
for (let char of string) {
|
|
783
|
-
if (syntax.isNotXmlChar(char)) {
|
|
784
|
-
this.scanner.reset(-([ ...string ].length - charIndex));
|
|
785
|
-
this.error('Invalid character');
|
|
786
|
-
}
|
|
787
|
-
|
|
788
|
-
charIndex += 1;
|
|
789
|
-
}
|
|
790
|
-
}
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
module.exports = Parser;
|
|
794
|
-
|
|
795
|
-
// -- Private Functions --------------------------------------------------------
|
|
796
|
-
|
|
797
|
-
/**
|
|
798
|
-
Normalizes the given XML string by stripping a byte order mark (if present) and
|
|
799
|
-
replacing CRLF sequences and lone CR characters with LF characters.
|
|
800
|
-
|
|
801
|
-
@param {string} xml
|
|
802
|
-
@returns {string}
|
|
803
|
-
*/
|
|
804
|
-
function normalizeXmlString(xml) {
|
|
805
|
-
if (xml[0] === '\uFEFF') {
|
|
806
|
-
xml = xml.slice(1);
|
|
807
|
-
}
|
|
808
|
-
|
|
809
|
-
return xml.replace(/\r\n?/g, '\n');
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
/** @typedef {import('./XmlNode')} XmlNode */
|