@rgrove/parse-xml 4.0.1 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -25
- package/dist/browser.js +642 -223
- package/dist/browser.js.map +4 -4
- package/dist/global.min.js +9 -8
- package/dist/global.min.js.map +4 -4
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/Parser.d.ts +49 -6
- package/dist/lib/Parser.d.ts.map +1 -1
- package/dist/lib/Parser.js +133 -102
- package/dist/lib/Parser.js.map +1 -1
- package/dist/lib/StringScanner.d.ts +5 -5
- package/dist/lib/StringScanner.d.ts.map +1 -1
- package/dist/lib/StringScanner.js +9 -9
- package/dist/lib/StringScanner.js.map +1 -1
- package/dist/lib/XmlDeclaration.d.ts +30 -0
- package/dist/lib/XmlDeclaration.d.ts.map +1 -0
- package/dist/lib/XmlDeclaration.js +36 -0
- package/dist/lib/XmlDeclaration.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +4 -2
- package/dist/lib/XmlDocument.d.ts.map +1 -1
- package/dist/lib/XmlDocument.js.map +1 -1
- package/dist/lib/XmlDocumentType.d.ts +37 -0
- package/dist/lib/XmlDocumentType.d.ts.map +1 -0
- package/dist/lib/XmlDocumentType.js +39 -0
- package/dist/lib/XmlDocumentType.js.map +1 -0
- package/dist/lib/XmlError.d.ts +24 -0
- package/dist/lib/XmlError.d.ts.map +1 -0
- package/dist/lib/XmlError.js +52 -0
- package/dist/lib/XmlError.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +20 -1
- package/dist/lib/XmlNode.d.ts.map +1 -1
- package/dist/lib/XmlNode.js +28 -3
- package/dist/lib/XmlNode.js.map +1 -1
- package/dist/lib/syntax.d.ts.map +1 -1
- package/dist/lib/syntax.js +1 -1
- package/dist/lib/syntax.js.map +1 -1
- package/dist/lib/types.d.ts +2 -2
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +20 -18
- package/src/index.ts +3 -0
- package/src/lib/Parser.ts +195 -118
- package/src/lib/StringScanner.ts +10 -10
- package/src/lib/XmlDeclaration.ts +58 -0
- package/src/lib/XmlDocument.ts +4 -2
- package/src/lib/XmlDocumentType.ts +67 -0
- package/src/lib/XmlError.ts +80 -0
- package/src/lib/XmlNode.ts +33 -3
- package/src/lib/syntax.ts +1 -1
package/dist/browser.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/*! @rgrove/parse-xml v4.0
|
|
1
|
+
/*! @rgrove/parse-xml v4.1.0 | ISC License | Copyright Ryan Grove */
|
|
2
2
|
"use strict";
|
|
3
3
|
var __defProp = Object.defineProperty;
|
|
4
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -23,8 +23,11 @@ var src_exports = {};
|
|
|
23
23
|
__export(src_exports, {
|
|
24
24
|
XmlCdata: () => XmlCdata,
|
|
25
25
|
XmlComment: () => XmlComment,
|
|
26
|
+
XmlDeclaration: () => XmlDeclaration,
|
|
26
27
|
XmlDocument: () => XmlDocument,
|
|
28
|
+
XmlDocumentType: () => XmlDocumentType,
|
|
27
29
|
XmlElement: () => XmlElement,
|
|
30
|
+
XmlError: () => XmlError,
|
|
28
31
|
XmlNode: () => XmlNode,
|
|
29
32
|
XmlProcessingInstruction: () => XmlProcessingInstruction,
|
|
30
33
|
XmlText: () => XmlText,
|
|
@@ -37,125 +40,200 @@ var emptyString = "";
|
|
|
37
40
|
var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
|
38
41
|
var StringScanner = class {
|
|
39
42
|
constructor(string) {
|
|
40
|
-
this.
|
|
43
|
+
this.k = this.q(string, true);
|
|
41
44
|
this.d = 0;
|
|
42
45
|
this.length = string.length;
|
|
43
|
-
this.
|
|
44
|
-
this.
|
|
45
|
-
if (this.
|
|
46
|
+
this.n = this.k !== this.length;
|
|
47
|
+
this.m = string;
|
|
48
|
+
if (this.n) {
|
|
46
49
|
let charsToBytes = [];
|
|
47
|
-
for (let byteIndex = 0, charIndex = 0; charIndex < this.
|
|
50
|
+
for (let byteIndex = 0, charIndex = 0; charIndex < this.k; ++charIndex) {
|
|
48
51
|
charsToBytes[charIndex] = byteIndex;
|
|
49
52
|
byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
|
|
50
53
|
}
|
|
51
|
-
this.
|
|
54
|
+
this.y = charsToBytes;
|
|
52
55
|
}
|
|
53
56
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
/**
|
|
58
|
+
* Whether the current character index is at the end of the input string.
|
|
59
|
+
*/
|
|
60
|
+
get z() {
|
|
61
|
+
return this.d >= this.k;
|
|
62
|
+
}
|
|
63
|
+
// -- Protected Methods ------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Returns the number of characters in the given string, which may differ from
|
|
66
|
+
* the byte length if the string contains multibyte characters.
|
|
67
|
+
*/
|
|
68
|
+
q(string, multiByteSafe = this.n) {
|
|
62
69
|
return multiByteSafe ? string.replace(surrogatePair, "_").length : string.length;
|
|
63
70
|
}
|
|
64
|
-
|
|
65
|
-
|
|
71
|
+
// -- Public Methods ---------------------------------------------------------
|
|
72
|
+
/**
|
|
73
|
+
* Advances the scanner by the given number of characters, stopping if the end
|
|
74
|
+
* of the string is reached.
|
|
75
|
+
*/
|
|
76
|
+
g(count = 1) {
|
|
77
|
+
this.d = Math.min(this.k, this.d + count);
|
|
66
78
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Returns the byte index of the given character index in the string. The two
|
|
81
|
+
* may differ in strings that contain multibyte characters.
|
|
82
|
+
*/
|
|
83
|
+
i(charIndex = this.d) {
|
|
84
|
+
var _a;
|
|
85
|
+
return this.n ? (_a = this.y[charIndex]) != null ? _a : Infinity : charIndex;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Consumes and returns the given number of characters if possible, advancing
|
|
89
|
+
* the scanner and stopping if the end of the string is reached.
|
|
90
|
+
*
|
|
91
|
+
* If no characters could be consumed, an empty string will be returned.
|
|
92
|
+
*/
|
|
93
|
+
F(count = 1) {
|
|
94
|
+
let chars = this.h(count);
|
|
95
|
+
this.g(count);
|
|
70
96
|
return chars;
|
|
71
97
|
}
|
|
72
|
-
|
|
98
|
+
/**
|
|
99
|
+
* Consumes a match for the given sticky regex, advances the scanner, updates
|
|
100
|
+
* the `lastIndex` property of the regex, and returns the matching string.
|
|
101
|
+
*
|
|
102
|
+
* The regex must have a sticky flag ("y") so that its `lastIndex` prop can be
|
|
103
|
+
* used to anchor the match at the current scanner position.
|
|
104
|
+
*
|
|
105
|
+
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
106
|
+
*/
|
|
107
|
+
G(regex) {
|
|
73
108
|
if (!regex.sticky) {
|
|
74
109
|
throw new Error('`regex` must have a sticky flag ("y")');
|
|
75
110
|
}
|
|
76
|
-
regex.lastIndex = this.
|
|
77
|
-
let result = regex.exec(this.
|
|
111
|
+
regex.lastIndex = this.i();
|
|
112
|
+
let result = regex.exec(this.m);
|
|
78
113
|
if (result === null || result.length === 0) {
|
|
79
114
|
return emptyString;
|
|
80
115
|
}
|
|
81
116
|
let match = result[0];
|
|
82
|
-
this.
|
|
117
|
+
this.g(this.q(match));
|
|
83
118
|
return match;
|
|
84
119
|
}
|
|
85
|
-
|
|
120
|
+
/**
|
|
121
|
+
* Consumes and returns all characters for which the given function returns a
|
|
122
|
+
* truthy value, stopping on the first falsy return value or if the end of the
|
|
123
|
+
* input is reached.
|
|
124
|
+
*/
|
|
125
|
+
v(fn) {
|
|
86
126
|
let char;
|
|
87
127
|
let match = emptyString;
|
|
88
|
-
while ((char = this.
|
|
128
|
+
while ((char = this.h()) && fn(char)) {
|
|
89
129
|
match += char;
|
|
90
|
-
this.
|
|
130
|
+
this.g();
|
|
91
131
|
}
|
|
92
132
|
return match;
|
|
93
133
|
}
|
|
94
|
-
|
|
134
|
+
/**
|
|
135
|
+
* Consumes the given string if it exists at the current character index, and
|
|
136
|
+
* advances the scanner.
|
|
137
|
+
*
|
|
138
|
+
* If the given string doesn't exist at the current character index, an empty
|
|
139
|
+
* string will be returned and the scanner will not be advanced.
|
|
140
|
+
*/
|
|
141
|
+
Q(stringToConsume) {
|
|
95
142
|
if (this.b(stringToConsume)) {
|
|
96
143
|
return stringToConsume;
|
|
97
144
|
}
|
|
98
|
-
if (this.
|
|
145
|
+
if (this.n) {
|
|
99
146
|
let { length } = stringToConsume;
|
|
100
|
-
let charLengthToMatch = this.
|
|
101
|
-
if (charLengthToMatch !== length && stringToConsume === this.
|
|
102
|
-
this.
|
|
147
|
+
let charLengthToMatch = this.q(stringToConsume);
|
|
148
|
+
if (charLengthToMatch !== length && stringToConsume === this.h(charLengthToMatch)) {
|
|
149
|
+
this.g(charLengthToMatch);
|
|
103
150
|
return stringToConsume;
|
|
104
151
|
}
|
|
105
152
|
}
|
|
106
153
|
return emptyString;
|
|
107
154
|
}
|
|
155
|
+
/**
|
|
156
|
+
* Does the same thing as `consumeString()`, but doesn't support consuming
|
|
157
|
+
* multibyte characters. This can be faster if you only need to match single
|
|
158
|
+
* byte characters.
|
|
159
|
+
*/
|
|
108
160
|
b(stringToConsume) {
|
|
109
161
|
let { length } = stringToConsume;
|
|
110
|
-
if (this.
|
|
111
|
-
this.
|
|
162
|
+
if (this.h(length) === stringToConsume) {
|
|
163
|
+
this.g(length);
|
|
112
164
|
return stringToConsume;
|
|
113
165
|
}
|
|
114
166
|
return emptyString;
|
|
115
167
|
}
|
|
116
|
-
|
|
117
|
-
|
|
168
|
+
/**
|
|
169
|
+
* Consumes characters until the given global regex is matched, advancing the
|
|
170
|
+
* scanner up to (but not beyond) the beginning of the match. If the regex
|
|
171
|
+
* doesn't match, nothing will be consumed.
|
|
172
|
+
*
|
|
173
|
+
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
174
|
+
*/
|
|
175
|
+
A(regex) {
|
|
176
|
+
let restOfString = this.m.slice(this.i());
|
|
118
177
|
let matchByteIndex = restOfString.search(regex);
|
|
119
178
|
if (matchByteIndex <= 0) {
|
|
120
179
|
return emptyString;
|
|
121
180
|
}
|
|
122
181
|
let result = restOfString.slice(0, matchByteIndex);
|
|
123
|
-
this.
|
|
182
|
+
this.g(this.q(result));
|
|
124
183
|
return result;
|
|
125
184
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
185
|
+
/**
|
|
186
|
+
* Consumes characters until the given string is found, advancing the scanner
|
|
187
|
+
* up to (but not beyond) that point. If the string is never found, nothing
|
|
188
|
+
* will be consumed.
|
|
189
|
+
*
|
|
190
|
+
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
191
|
+
*/
|
|
192
|
+
t(searchString) {
|
|
193
|
+
let { m: string } = this;
|
|
194
|
+
let byteIndex = this.i();
|
|
129
195
|
let matchByteIndex = string.indexOf(searchString, byteIndex);
|
|
130
196
|
if (matchByteIndex <= 0) {
|
|
131
197
|
return emptyString;
|
|
132
198
|
}
|
|
133
199
|
let result = string.slice(byteIndex, matchByteIndex);
|
|
134
|
-
this.
|
|
200
|
+
this.g(this.q(result));
|
|
135
201
|
return result;
|
|
136
202
|
}
|
|
137
|
-
|
|
138
|
-
|
|
203
|
+
/**
|
|
204
|
+
* Returns the given number of characters starting at the current character
|
|
205
|
+
* index, without advancing the scanner and without exceeding the end of the
|
|
206
|
+
* input string.
|
|
207
|
+
*/
|
|
208
|
+
h(count = 1) {
|
|
209
|
+
let { d: charIndex, n: multiByteMode, m: string } = this;
|
|
139
210
|
if (multiByteMode) {
|
|
140
|
-
if (charIndex >= this.
|
|
211
|
+
if (charIndex >= this.k) {
|
|
141
212
|
return emptyString;
|
|
142
213
|
}
|
|
143
214
|
return string.slice(
|
|
144
|
-
this.
|
|
145
|
-
this.
|
|
215
|
+
this.i(charIndex),
|
|
216
|
+
this.i(charIndex + count)
|
|
146
217
|
);
|
|
147
218
|
}
|
|
148
219
|
return string.slice(charIndex, charIndex + count);
|
|
149
220
|
}
|
|
221
|
+
/**
|
|
222
|
+
* Resets the scanner position to the given character _index_, or to the start
|
|
223
|
+
* of the input string if no index is given.
|
|
224
|
+
*
|
|
225
|
+
* If _index_ is negative, the scanner position will be moved backward by that
|
|
226
|
+
* many characters, stopping if the beginning of the string is reached.
|
|
227
|
+
*/
|
|
150
228
|
o(index = 0) {
|
|
151
|
-
this.d = index >= 0 ? Math.min(this.
|
|
229
|
+
this.d = index >= 0 ? Math.min(this.k, index) : Math.max(0, this.d + index);
|
|
152
230
|
}
|
|
153
231
|
};
|
|
154
232
|
|
|
155
233
|
// src/lib/syntax.ts
|
|
156
234
|
var attValueCharDoubleQuote = /[^"&<]+/y;
|
|
157
235
|
var attValueCharSingleQuote = /[^'&<]+/y;
|
|
158
|
-
var attValueNormalizedWhitespace =
|
|
236
|
+
var attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
|
|
159
237
|
var endCharData = /<|&|]]>/;
|
|
160
238
|
var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.create(null), {
|
|
161
239
|
amp: "&",
|
|
@@ -186,24 +264,70 @@ function getCodePoint(char) {
|
|
|
186
264
|
}
|
|
187
265
|
|
|
188
266
|
// src/lib/XmlNode.ts
|
|
189
|
-
var
|
|
267
|
+
var _XmlNode = class {
|
|
190
268
|
constructor() {
|
|
269
|
+
/**
|
|
270
|
+
* Parent node of this node, or `null` if this node has no parent.
|
|
271
|
+
*/
|
|
191
272
|
this.parent = null;
|
|
192
|
-
|
|
273
|
+
/**
|
|
274
|
+
* Starting byte offset of this node in the original XML string, or `-1` if
|
|
275
|
+
* the offset is unknown.
|
|
276
|
+
*/
|
|
277
|
+
this.start = -1;
|
|
278
|
+
/**
|
|
279
|
+
* Ending byte offset of this node in the original XML string, or `-1` if the
|
|
280
|
+
* offset is unknown.
|
|
281
|
+
*/
|
|
282
|
+
this.end = -1;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Document that contains this node, or `null` if this node is not associated
|
|
286
|
+
* with a document.
|
|
287
|
+
*/
|
|
193
288
|
get document() {
|
|
194
289
|
var _a, _b;
|
|
195
290
|
return (_b = (_a = this.parent) == null ? void 0 : _a.document) != null ? _b : null;
|
|
196
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Whether this node is the root node of the document (also known as the
|
|
294
|
+
* document element).
|
|
295
|
+
*/
|
|
197
296
|
get isRootNode() {
|
|
198
|
-
return this.parent !== null && this.parent === this.document;
|
|
199
|
-
}
|
|
297
|
+
return this.parent !== null && this.parent === this.document && this.type === _XmlNode.TYPE_ELEMENT;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Whether whitespace should be preserved in the content of this element and
|
|
301
|
+
* its children.
|
|
302
|
+
*
|
|
303
|
+
* This is influenced by the value of the special `xml:space` attribute, and
|
|
304
|
+
* will be `true` for any node whose `xml:space` attribute is set to
|
|
305
|
+
* "preserve". If a node has no such attribute, it will inherit the value of
|
|
306
|
+
* the nearest ancestor that does (if any).
|
|
307
|
+
*
|
|
308
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
|
|
309
|
+
*/
|
|
200
310
|
get preserveWhitespace() {
|
|
201
311
|
var _a;
|
|
202
|
-
return
|
|
203
|
-
}
|
|
312
|
+
return !!((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Type of this node.
|
|
316
|
+
*
|
|
317
|
+
* The value of this property is a string that matches one of the static
|
|
318
|
+
* `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
|
|
319
|
+
* `TYPE_TEXT`, etc.).
|
|
320
|
+
*
|
|
321
|
+
* The `XmlNode` class itself is a base class and doesn't have its own type
|
|
322
|
+
* name.
|
|
323
|
+
*/
|
|
204
324
|
get type() {
|
|
205
325
|
return "";
|
|
206
326
|
}
|
|
327
|
+
/**
|
|
328
|
+
* Returns a JSON-serializable object representing this node, minus properties
|
|
329
|
+
* that could result in circular references.
|
|
330
|
+
*/
|
|
207
331
|
toJSON() {
|
|
208
332
|
let json = {
|
|
209
333
|
type: this.type
|
|
@@ -214,15 +338,46 @@ var XmlNode = class {
|
|
|
214
338
|
if (this.preserveWhitespace) {
|
|
215
339
|
json.preserveWhitespace = true;
|
|
216
340
|
}
|
|
341
|
+
if (this.start !== -1) {
|
|
342
|
+
json.start = this.start;
|
|
343
|
+
json.end = this.end;
|
|
344
|
+
}
|
|
217
345
|
return json;
|
|
218
346
|
}
|
|
219
347
|
};
|
|
348
|
+
var XmlNode = _XmlNode;
|
|
349
|
+
/**
|
|
350
|
+
* Type value for an `XmlCdata` node.
|
|
351
|
+
*/
|
|
220
352
|
XmlNode.TYPE_CDATA = "cdata";
|
|
353
|
+
/**
|
|
354
|
+
* Type value for an `XmlComment` node.
|
|
355
|
+
*/
|
|
221
356
|
XmlNode.TYPE_COMMENT = "comment";
|
|
357
|
+
/**
|
|
358
|
+
* Type value for an `XmlDocument` node.
|
|
359
|
+
*/
|
|
222
360
|
XmlNode.TYPE_DOCUMENT = "document";
|
|
361
|
+
/**
|
|
362
|
+
* Type value for an `XmlDocumentType` node.
|
|
363
|
+
*/
|
|
364
|
+
XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
|
|
365
|
+
/**
|
|
366
|
+
* Type value for an `XmlElement` node.
|
|
367
|
+
*/
|
|
223
368
|
XmlNode.TYPE_ELEMENT = "element";
|
|
369
|
+
/**
|
|
370
|
+
* Type value for an `XmlProcessingInstruction` node.
|
|
371
|
+
*/
|
|
224
372
|
XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
|
|
373
|
+
/**
|
|
374
|
+
* Type value for an `XmlText` node.
|
|
375
|
+
*/
|
|
225
376
|
XmlNode.TYPE_TEXT = "text";
|
|
377
|
+
/**
|
|
378
|
+
* Type value for an `XmlDeclaration` node.
|
|
379
|
+
*/
|
|
380
|
+
XmlNode.TYPE_XML_DECLARATION = "xmldecl";
|
|
226
381
|
|
|
227
382
|
// src/lib/XmlText.ts
|
|
228
383
|
var XmlText = class extends XmlNode {
|
|
@@ -263,6 +418,29 @@ var XmlComment = class extends XmlNode {
|
|
|
263
418
|
}
|
|
264
419
|
};
|
|
265
420
|
|
|
421
|
+
// src/lib/XmlDeclaration.ts
|
|
422
|
+
var XmlDeclaration = class extends XmlNode {
|
|
423
|
+
constructor(version, encoding, standalone) {
|
|
424
|
+
super();
|
|
425
|
+
this.version = version;
|
|
426
|
+
this.encoding = encoding != null ? encoding : null;
|
|
427
|
+
this.standalone = standalone != null ? standalone : null;
|
|
428
|
+
}
|
|
429
|
+
get type() {
|
|
430
|
+
return XmlNode.TYPE_XML_DECLARATION;
|
|
431
|
+
}
|
|
432
|
+
toJSON() {
|
|
433
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
434
|
+
json.version = this.version;
|
|
435
|
+
for (let key of ["encoding", "standalone"]) {
|
|
436
|
+
if (this[key] !== null) {
|
|
437
|
+
json[key] = this[key];
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
return json;
|
|
441
|
+
}
|
|
442
|
+
};
|
|
443
|
+
|
|
266
444
|
// src/lib/XmlElement.ts
|
|
267
445
|
var XmlElement = class extends XmlNode {
|
|
268
446
|
constructor(name, attributes = /* @__PURE__ */ Object.create(null), children = []) {
|
|
@@ -271,6 +449,9 @@ var XmlElement = class extends XmlNode {
|
|
|
271
449
|
this.attributes = attributes;
|
|
272
450
|
this.children = children;
|
|
273
451
|
}
|
|
452
|
+
/**
|
|
453
|
+
* Whether this element is empty (meaning it has no children).
|
|
454
|
+
*/
|
|
274
455
|
get isEmpty() {
|
|
275
456
|
return this.children.length === 0;
|
|
276
457
|
}
|
|
@@ -284,6 +465,9 @@ var XmlElement = class extends XmlNode {
|
|
|
284
465
|
}
|
|
285
466
|
return false;
|
|
286
467
|
}
|
|
468
|
+
/**
|
|
469
|
+
* Text content of this element and all its descendants.
|
|
470
|
+
*/
|
|
287
471
|
get text() {
|
|
288
472
|
return this.children.map((child) => "text" in child ? child.text : "").join("");
|
|
289
473
|
}
|
|
@@ -308,6 +492,9 @@ var XmlDocument = class extends XmlNode {
|
|
|
308
492
|
get document() {
|
|
309
493
|
return this;
|
|
310
494
|
}
|
|
495
|
+
/**
|
|
496
|
+
* Root element of this document, or `null` if this document is empty.
|
|
497
|
+
*/
|
|
311
498
|
get root() {
|
|
312
499
|
for (let child of this.children) {
|
|
313
500
|
if (child instanceof XmlElement) {
|
|
@@ -316,6 +503,9 @@ var XmlDocument = class extends XmlNode {
|
|
|
316
503
|
}
|
|
317
504
|
return null;
|
|
318
505
|
}
|
|
506
|
+
/**
|
|
507
|
+
* Text content of this document and all its descendants.
|
|
508
|
+
*/
|
|
319
509
|
get text() {
|
|
320
510
|
return this.children.map((child) => "text" in child ? child.text : "").join("");
|
|
321
511
|
}
|
|
@@ -329,6 +519,71 @@ var XmlDocument = class extends XmlNode {
|
|
|
329
519
|
}
|
|
330
520
|
};
|
|
331
521
|
|
|
522
|
+
// src/lib/XmlDocumentType.ts
|
|
523
|
+
var XmlDocumentType = class extends XmlNode {
|
|
524
|
+
constructor(name, publicId, systemId, internalSubset) {
|
|
525
|
+
super();
|
|
526
|
+
this.name = name;
|
|
527
|
+
this.publicId = publicId != null ? publicId : null;
|
|
528
|
+
this.systemId = systemId != null ? systemId : null;
|
|
529
|
+
this.internalSubset = internalSubset != null ? internalSubset : null;
|
|
530
|
+
}
|
|
531
|
+
get type() {
|
|
532
|
+
return XmlNode.TYPE_DOCUMENT_TYPE;
|
|
533
|
+
}
|
|
534
|
+
toJSON() {
|
|
535
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
536
|
+
json.name = this.name;
|
|
537
|
+
for (let key of ["publicId", "systemId", "internalSubset"]) {
|
|
538
|
+
if (this[key] !== null) {
|
|
539
|
+
json[key] = this[key];
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
return json;
|
|
543
|
+
}
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
// src/lib/XmlError.ts
|
|
547
|
+
var XmlError = class extends Error {
|
|
548
|
+
constructor(message, charIndex, xml) {
|
|
549
|
+
let column = 1;
|
|
550
|
+
let excerpt = "";
|
|
551
|
+
let line = 1;
|
|
552
|
+
for (let i = 0; i < charIndex; ++i) {
|
|
553
|
+
let char = xml[i];
|
|
554
|
+
if (char === "\n") {
|
|
555
|
+
column = 1;
|
|
556
|
+
excerpt = "";
|
|
557
|
+
line += 1;
|
|
558
|
+
} else {
|
|
559
|
+
column += 1;
|
|
560
|
+
excerpt += char;
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
let eol = xml.indexOf("\n", charIndex);
|
|
564
|
+
excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
|
|
565
|
+
let excerptStart = 0;
|
|
566
|
+
if (excerpt.length > 50) {
|
|
567
|
+
if (column < 40) {
|
|
568
|
+
excerpt = excerpt.slice(0, 50);
|
|
569
|
+
} else {
|
|
570
|
+
excerptStart = column - 20;
|
|
571
|
+
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
super(
|
|
575
|
+
`${message} (line ${line}, column ${column})
|
|
576
|
+
${excerpt}
|
|
577
|
+
` + " ".repeat(column - excerptStart + 1) + "^\n"
|
|
578
|
+
);
|
|
579
|
+
this.column = column;
|
|
580
|
+
this.excerpt = excerpt;
|
|
581
|
+
this.line = line;
|
|
582
|
+
this.name = "XmlError";
|
|
583
|
+
this.pos = charIndex;
|
|
584
|
+
}
|
|
585
|
+
};
|
|
586
|
+
|
|
332
587
|
// src/lib/XmlProcessingInstruction.ts
|
|
333
588
|
var XmlProcessingInstruction = class extends XmlNode {
|
|
334
589
|
constructor(name, content = "") {
|
|
@@ -350,45 +605,76 @@ var XmlProcessingInstruction = class extends XmlNode {
|
|
|
350
605
|
// src/lib/Parser.ts
|
|
351
606
|
var emptyString2 = "";
|
|
352
607
|
var Parser = class {
|
|
608
|
+
/**
|
|
609
|
+
* @param xml XML string to parse.
|
|
610
|
+
* @param options Parser options.
|
|
611
|
+
*/
|
|
353
612
|
constructor(xml, options = {}) {
|
|
354
|
-
this.document = new XmlDocument();
|
|
355
|
-
this.
|
|
356
|
-
this.
|
|
357
|
-
this.
|
|
358
|
-
this.
|
|
359
|
-
|
|
613
|
+
let doc = this.document = new XmlDocument();
|
|
614
|
+
let scanner = this.c = new StringScanner(xml);
|
|
615
|
+
this.l = doc;
|
|
616
|
+
this.f = options;
|
|
617
|
+
if (this.f.includeOffsets) {
|
|
618
|
+
doc.start = 0;
|
|
619
|
+
doc.end = xml.length;
|
|
620
|
+
}
|
|
621
|
+
scanner.b("\uFEFF");
|
|
622
|
+
this.H();
|
|
623
|
+
if (!this.B()) {
|
|
360
624
|
throw this.a("Root element is missing or invalid");
|
|
361
625
|
}
|
|
362
|
-
while (this.
|
|
626
|
+
while (this.w()) {
|
|
363
627
|
}
|
|
364
|
-
if (!
|
|
628
|
+
if (!scanner.z) {
|
|
365
629
|
throw this.a("Extra content at the end of the document");
|
|
366
630
|
}
|
|
367
631
|
}
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
632
|
+
/**
|
|
633
|
+
* Adds the given `XmlNode` as a child of `this.currentNode`.
|
|
634
|
+
*/
|
|
635
|
+
j(node, charIndex) {
|
|
636
|
+
node.parent = this.l;
|
|
637
|
+
if (this.f.includeOffsets) {
|
|
638
|
+
node.start = this.c.i(charIndex);
|
|
639
|
+
node.end = this.c.i();
|
|
640
|
+
}
|
|
641
|
+
this.l.children.push(node);
|
|
642
|
+
return true;
|
|
371
643
|
}
|
|
372
|
-
|
|
373
|
-
|
|
644
|
+
/**
|
|
645
|
+
* Adds the given _text_ to the document, either by appending it to a
|
|
646
|
+
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
|
|
647
|
+
*/
|
|
648
|
+
x(text, charIndex) {
|
|
649
|
+
let { children } = this.l;
|
|
374
650
|
let { length } = children;
|
|
651
|
+
text = normalizeLineBreaks(text);
|
|
375
652
|
if (length > 0) {
|
|
376
653
|
let prevNode = children[length - 1];
|
|
377
|
-
if (prevNode
|
|
378
|
-
|
|
379
|
-
|
|
654
|
+
if ((prevNode == null ? void 0 : prevNode.type) === XmlNode.TYPE_TEXT) {
|
|
655
|
+
let textNode = prevNode;
|
|
656
|
+
textNode.text += text;
|
|
657
|
+
if (this.f.includeOffsets) {
|
|
658
|
+
textNode.end = this.c.i();
|
|
659
|
+
}
|
|
660
|
+
return true;
|
|
380
661
|
}
|
|
381
662
|
}
|
|
382
|
-
this.
|
|
663
|
+
return this.j(new XmlText(text), charIndex);
|
|
383
664
|
}
|
|
384
|
-
|
|
665
|
+
/**
|
|
666
|
+
* Consumes element attributes.
|
|
667
|
+
*
|
|
668
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
|
|
669
|
+
*/
|
|
670
|
+
I() {
|
|
385
671
|
let attributes = /* @__PURE__ */ Object.create(null);
|
|
386
672
|
while (this.e()) {
|
|
387
|
-
let attrName = this.
|
|
673
|
+
let attrName = this.r();
|
|
388
674
|
if (!attrName) {
|
|
389
675
|
break;
|
|
390
676
|
}
|
|
391
|
-
let attrValue = this.
|
|
677
|
+
let attrValue = this.u() && this.J();
|
|
392
678
|
if (attrValue === false) {
|
|
393
679
|
throw this.a("Attribute value expected");
|
|
394
680
|
}
|
|
@@ -400,7 +686,7 @@ var Parser = class {
|
|
|
400
686
|
}
|
|
401
687
|
attributes[attrName] = attrValue;
|
|
402
688
|
}
|
|
403
|
-
if (this.
|
|
689
|
+
if (this.f.sortAttributes) {
|
|
404
690
|
let attrNames = Object.keys(attributes).sort();
|
|
405
691
|
let sortedAttributes = /* @__PURE__ */ Object.create(null);
|
|
406
692
|
for (let i = 0; i < attrNames.length; ++i) {
|
|
@@ -411,30 +697,40 @@ var Parser = class {
|
|
|
411
697
|
}
|
|
412
698
|
return attributes;
|
|
413
699
|
}
|
|
414
|
-
|
|
700
|
+
/**
|
|
701
|
+
* Consumes an `AttValue` (attribute value) if possible.
|
|
702
|
+
*
|
|
703
|
+
* @returns
|
|
704
|
+
* Contents of the `AttValue` minus quotes, or `false` if nothing was
|
|
705
|
+
* consumed. An empty string indicates that an `AttValue` was consumed but
|
|
706
|
+
* was empty.
|
|
707
|
+
*
|
|
708
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
|
709
|
+
*/
|
|
710
|
+
J() {
|
|
415
711
|
let { c: scanner } = this;
|
|
416
|
-
let quote = scanner.
|
|
712
|
+
let quote = scanner.h();
|
|
417
713
|
if (quote !== '"' && quote !== "'") {
|
|
418
714
|
return false;
|
|
419
715
|
}
|
|
420
|
-
scanner.
|
|
716
|
+
scanner.g();
|
|
421
717
|
let chars;
|
|
422
718
|
let isClosed = false;
|
|
423
719
|
let value = emptyString2;
|
|
424
720
|
let regex = quote === '"' ? attValueCharDoubleQuote : attValueCharSingleQuote;
|
|
425
721
|
matchLoop:
|
|
426
|
-
while (!scanner.
|
|
427
|
-
chars = scanner.
|
|
722
|
+
while (!scanner.z) {
|
|
723
|
+
chars = scanner.G(regex);
|
|
428
724
|
if (chars) {
|
|
429
|
-
this.
|
|
725
|
+
this.p(chars);
|
|
430
726
|
value += chars.replace(attValueNormalizedWhitespace, " ");
|
|
431
727
|
}
|
|
432
|
-
switch (scanner.
|
|
728
|
+
switch (scanner.h()) {
|
|
433
729
|
case quote:
|
|
434
730
|
isClosed = true;
|
|
435
731
|
break matchLoop;
|
|
436
732
|
case "&":
|
|
437
|
-
value += this.
|
|
733
|
+
value += this.C();
|
|
438
734
|
continue;
|
|
439
735
|
case "<":
|
|
440
736
|
throw this.a("Unescaped `<` is not allowed in an attribute value");
|
|
@@ -445,105 +741,166 @@ var Parser = class {
|
|
|
445
741
|
if (!isClosed) {
|
|
446
742
|
throw this.a("Unclosed attribute");
|
|
447
743
|
}
|
|
448
|
-
scanner.
|
|
744
|
+
scanner.g();
|
|
449
745
|
return value;
|
|
450
746
|
}
|
|
451
|
-
|
|
747
|
+
/**
|
|
748
|
+
* Consumes a CDATA section if possible.
|
|
749
|
+
*
|
|
750
|
+
* @returns Whether a CDATA section was consumed.
|
|
751
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
|
|
752
|
+
*/
|
|
753
|
+
K() {
|
|
452
754
|
let { c: scanner } = this;
|
|
755
|
+
let startIndex = scanner.d;
|
|
453
756
|
if (!scanner.b("<![CDATA[")) {
|
|
454
757
|
return false;
|
|
455
758
|
}
|
|
456
|
-
let text = scanner.
|
|
457
|
-
this.
|
|
759
|
+
let text = scanner.t("]]>");
|
|
760
|
+
this.p(text);
|
|
458
761
|
if (!scanner.b("]]>")) {
|
|
459
762
|
throw this.a("Unclosed CDATA section");
|
|
460
763
|
}
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
764
|
+
return this.f.preserveCdata ? this.j(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.x(text, startIndex);
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* Consumes character data if possible.
|
|
768
|
+
*
|
|
769
|
+
* @returns Whether character data was consumed.
|
|
770
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
|
|
771
|
+
*/
|
|
772
|
+
L() {
|
|
469
773
|
let { c: scanner } = this;
|
|
470
|
-
let
|
|
774
|
+
let startIndex = scanner.d;
|
|
775
|
+
let charData = scanner.A(endCharData);
|
|
471
776
|
if (!charData) {
|
|
472
777
|
return false;
|
|
473
778
|
}
|
|
474
|
-
this.
|
|
475
|
-
if (scanner.
|
|
779
|
+
this.p(charData);
|
|
780
|
+
if (scanner.h(3) === "]]>") {
|
|
476
781
|
throw this.a("Element content may not contain the CDATA section close delimiter `]]>`");
|
|
477
782
|
}
|
|
478
|
-
this.
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
783
|
+
return this.x(charData, startIndex);
|
|
784
|
+
}
|
|
785
|
+
/**
|
|
786
|
+
* Consumes a comment if possible.
|
|
787
|
+
*
|
|
788
|
+
* @returns Whether a comment was consumed.
|
|
789
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
|
|
790
|
+
*/
|
|
791
|
+
D() {
|
|
482
792
|
let { c: scanner } = this;
|
|
793
|
+
let startIndex = scanner.d;
|
|
483
794
|
if (!scanner.b("<!--")) {
|
|
484
795
|
return false;
|
|
485
796
|
}
|
|
486
|
-
let content = scanner.
|
|
487
|
-
this.
|
|
797
|
+
let content = scanner.t("--");
|
|
798
|
+
this.p(content);
|
|
488
799
|
if (!scanner.b("-->")) {
|
|
489
|
-
if (scanner.
|
|
800
|
+
if (scanner.h(2) === "--") {
|
|
490
801
|
throw this.a("The string `--` isn't allowed inside a comment");
|
|
491
802
|
}
|
|
492
803
|
throw this.a("Unclosed comment");
|
|
493
804
|
}
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
805
|
+
return this.f.preserveComments ? this.j(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
|
|
806
|
+
}
|
|
807
|
+
/**
|
|
808
|
+
* Consumes a reference in a content context if possible.
|
|
809
|
+
*
|
|
810
|
+
* This differs from `consumeReference()` in that a consumed reference will be
|
|
811
|
+
* added to the document as a text node instead of returned.
|
|
812
|
+
*
|
|
813
|
+
* @returns Whether a reference was consumed.
|
|
814
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
|
815
|
+
*/
|
|
816
|
+
M() {
|
|
817
|
+
let startIndex = this.c.d;
|
|
818
|
+
let ref = this.C();
|
|
819
|
+
return ref ? this.x(ref, startIndex) : false;
|
|
820
|
+
}
|
|
821
|
+
/**
|
|
822
|
+
* Consumes a doctype declaration if possible.
|
|
823
|
+
*
|
|
824
|
+
* This is a loose implementation since doctype declarations are currently
|
|
825
|
+
* discarded without further parsing.
|
|
826
|
+
*
|
|
827
|
+
* @returns Whether a doctype declaration was consumed.
|
|
828
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
|
|
829
|
+
*/
|
|
830
|
+
N() {
|
|
508
831
|
let { c: scanner } = this;
|
|
509
|
-
|
|
832
|
+
let startIndex = scanner.d;
|
|
833
|
+
if (!scanner.b("<!DOCTYPE")) {
|
|
510
834
|
return false;
|
|
511
835
|
}
|
|
512
|
-
|
|
513
|
-
if (
|
|
514
|
-
|
|
836
|
+
let name = this.e() && this.r();
|
|
837
|
+
if (!name) {
|
|
838
|
+
throw this.a("Expected a name");
|
|
839
|
+
}
|
|
840
|
+
let publicId;
|
|
841
|
+
let systemId;
|
|
842
|
+
if (this.e()) {
|
|
843
|
+
if (scanner.b("PUBLIC")) {
|
|
844
|
+
publicId = this.e() && this.O();
|
|
845
|
+
if (publicId === false) {
|
|
846
|
+
throw this.a("Expected a public identifier");
|
|
847
|
+
}
|
|
848
|
+
this.e();
|
|
849
|
+
}
|
|
850
|
+
if (publicId !== void 0 || scanner.b("SYSTEM")) {
|
|
851
|
+
this.e();
|
|
852
|
+
systemId = this.s();
|
|
853
|
+
if (systemId === false) {
|
|
854
|
+
throw this.a("Expected a system identifier");
|
|
855
|
+
}
|
|
856
|
+
this.e();
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
let internalSubset;
|
|
860
|
+
if (scanner.b("[")) {
|
|
861
|
+
internalSubset = scanner.A(/\][\x20\t\r\n]*>/);
|
|
862
|
+
if (!scanner.b("]")) {
|
|
863
|
+
throw this.a("Unclosed internal subset");
|
|
864
|
+
}
|
|
865
|
+
this.e();
|
|
515
866
|
}
|
|
516
867
|
if (!scanner.b(">")) {
|
|
517
868
|
throw this.a("Unclosed doctype declaration");
|
|
518
869
|
}
|
|
519
|
-
return true;
|
|
520
|
-
}
|
|
521
|
-
|
|
870
|
+
return this.f.preserveDocumentType ? this.j(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
|
|
871
|
+
}
|
|
872
|
+
/**
|
|
873
|
+
* Consumes an element if possible.
|
|
874
|
+
*
|
|
875
|
+
* @returns Whether an element was consumed.
|
|
876
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
|
|
877
|
+
*/
|
|
878
|
+
B() {
|
|
522
879
|
let { c: scanner } = this;
|
|
523
|
-
let
|
|
880
|
+
let startIndex = scanner.d;
|
|
524
881
|
if (!scanner.b("<")) {
|
|
525
882
|
return false;
|
|
526
883
|
}
|
|
527
|
-
let name = this.
|
|
884
|
+
let name = this.r();
|
|
528
885
|
if (!name) {
|
|
529
|
-
scanner.o(
|
|
886
|
+
scanner.o(startIndex);
|
|
530
887
|
return false;
|
|
531
888
|
}
|
|
532
|
-
let attributes = this.
|
|
533
|
-
let isEmpty =
|
|
889
|
+
let attributes = this.I();
|
|
890
|
+
let isEmpty = !!scanner.b("/>");
|
|
534
891
|
let element = new XmlElement(name, attributes);
|
|
535
|
-
element.parent = this.
|
|
892
|
+
element.parent = this.l;
|
|
536
893
|
if (!isEmpty) {
|
|
537
894
|
if (!scanner.b(">")) {
|
|
538
895
|
throw this.a(`Unclosed start tag for element \`${name}\``);
|
|
539
896
|
}
|
|
540
|
-
this.
|
|
897
|
+
this.l = element;
|
|
541
898
|
do {
|
|
542
|
-
this.
|
|
543
|
-
} while (this.
|
|
899
|
+
this.L();
|
|
900
|
+
} while (this.B() || this.M() || this.K() || this.E() || this.D());
|
|
544
901
|
let endTagMark = scanner.d;
|
|
545
902
|
let endTagName;
|
|
546
|
-
if (!scanner.b("</") || !(endTagName = this.
|
|
903
|
+
if (!scanner.b("</") || !(endTagName = this.r()) || endTagName !== name) {
|
|
547
904
|
scanner.o(endTagMark);
|
|
548
905
|
throw this.a(`Missing end tag for element ${name}`);
|
|
549
906
|
}
|
|
@@ -551,12 +908,17 @@ var Parser = class {
|
|
|
551
908
|
if (!scanner.b(">")) {
|
|
552
909
|
throw this.a(`Unclosed end tag for element ${name}`);
|
|
553
910
|
}
|
|
554
|
-
this.
|
|
911
|
+
this.l = element.parent;
|
|
555
912
|
}
|
|
556
|
-
this.
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
913
|
+
return this.j(element, startIndex);
|
|
914
|
+
}
|
|
915
|
+
/**
|
|
916
|
+
* Consumes an `Eq` production if possible.
|
|
917
|
+
*
|
|
918
|
+
* @returns Whether an `Eq` production was consumed.
|
|
919
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
|
|
920
|
+
*/
|
|
921
|
+
u() {
|
|
560
922
|
this.e();
|
|
561
923
|
if (this.c.b("=")) {
|
|
562
924
|
this.e();
|
|
@@ -564,22 +926,40 @@ var Parser = class {
|
|
|
564
926
|
}
|
|
565
927
|
return false;
|
|
566
928
|
}
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
929
|
+
/**
|
|
930
|
+
* Consumes `Misc` content if possible.
|
|
931
|
+
*
|
|
932
|
+
* @returns Whether anything was consumed.
|
|
933
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
|
|
934
|
+
*/
|
|
935
|
+
w() {
|
|
936
|
+
return this.D() || this.E() || this.e();
|
|
937
|
+
}
|
|
938
|
+
/**
|
|
939
|
+
* Consumes one or more `Name` characters if possible.
|
|
940
|
+
*
|
|
941
|
+
* @returns `Name` characters, or an empty string if none were consumed.
|
|
942
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
|
|
943
|
+
*/
|
|
944
|
+
r() {
|
|
945
|
+
return isNameStartChar(this.c.h()) ? this.c.v(isNameChar) : emptyString2;
|
|
946
|
+
}
|
|
947
|
+
/**
|
|
948
|
+
* Consumes a processing instruction if possible.
|
|
949
|
+
*
|
|
950
|
+
* @returns Whether a processing instruction was consumed.
|
|
951
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
|
|
952
|
+
*/
|
|
953
|
+
E() {
|
|
574
954
|
let { c: scanner } = this;
|
|
575
|
-
let
|
|
955
|
+
let startIndex = scanner.d;
|
|
576
956
|
if (!scanner.b("<?")) {
|
|
577
957
|
return false;
|
|
578
958
|
}
|
|
579
|
-
let name = this.
|
|
959
|
+
let name = this.r();
|
|
580
960
|
if (name) {
|
|
581
961
|
if (name.toLowerCase() === "xml") {
|
|
582
|
-
scanner.o(
|
|
962
|
+
scanner.o(startIndex);
|
|
583
963
|
throw this.a("XML declaration isn't allowed here");
|
|
584
964
|
}
|
|
585
965
|
} else {
|
|
@@ -587,38 +967,73 @@ var Parser = class {
|
|
|
587
967
|
}
|
|
588
968
|
if (!this.e()) {
|
|
589
969
|
if (scanner.b("?>")) {
|
|
590
|
-
this.
|
|
591
|
-
return true;
|
|
970
|
+
return this.j(new XmlProcessingInstruction(name), startIndex);
|
|
592
971
|
}
|
|
593
972
|
throw this.a("Whitespace is required after a processing instruction name");
|
|
594
973
|
}
|
|
595
|
-
let content = scanner.
|
|
596
|
-
this.
|
|
974
|
+
let content = scanner.t("?>");
|
|
975
|
+
this.p(content);
|
|
597
976
|
if (!scanner.b("?>")) {
|
|
598
977
|
throw this.a("Unterminated processing instruction");
|
|
599
978
|
}
|
|
600
|
-
this.
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
979
|
+
return this.j(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
|
|
980
|
+
}
|
|
981
|
+
/**
|
|
982
|
+
* Consumes a prolog if possible.
|
|
983
|
+
*
|
|
984
|
+
* @returns Whether a prolog was consumed.
|
|
985
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
|
|
986
|
+
*/
|
|
987
|
+
H() {
|
|
604
988
|
let { c: scanner } = this;
|
|
605
|
-
let
|
|
606
|
-
this.
|
|
607
|
-
while (this.
|
|
989
|
+
let startIndex = scanner.d;
|
|
990
|
+
this.P();
|
|
991
|
+
while (this.w()) {
|
|
608
992
|
}
|
|
609
|
-
if (this.
|
|
610
|
-
while (this.
|
|
993
|
+
if (this.N()) {
|
|
994
|
+
while (this.w()) {
|
|
611
995
|
}
|
|
612
996
|
}
|
|
613
|
-
return
|
|
997
|
+
return startIndex < scanner.d;
|
|
998
|
+
}
|
|
999
|
+
/**
|
|
1000
|
+
* Consumes a public identifier literal if possible.
|
|
1001
|
+
*
|
|
1002
|
+
* @returns
|
|
1003
|
+
* Value of the public identifier literal minus quotes, or `false` if
|
|
1004
|
+
* nothing was consumed. An empty string indicates that a public id literal
|
|
1005
|
+
* was consumed but was empty.
|
|
1006
|
+
*
|
|
1007
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
|
|
1008
|
+
*/
|
|
1009
|
+
O() {
|
|
1010
|
+
let startIndex = this.c.d;
|
|
1011
|
+
let value = this.s();
|
|
1012
|
+
if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
|
|
1013
|
+
this.c.o(startIndex);
|
|
1014
|
+
throw this.a("Invalid character in public identifier");
|
|
1015
|
+
}
|
|
1016
|
+
return value;
|
|
614
1017
|
}
|
|
615
|
-
|
|
1018
|
+
/**
|
|
1019
|
+
* Consumes a reference if possible.
|
|
1020
|
+
*
|
|
1021
|
+
* This differs from `consumeContentReference()` in that a consumed reference
|
|
1022
|
+
* will be returned rather than added to the document.
|
|
1023
|
+
*
|
|
1024
|
+
* @returns
|
|
1025
|
+
* Parsed reference value, or `false` if nothing was consumed (to
|
|
1026
|
+
* distinguish from a reference that resolves to an empty string).
|
|
1027
|
+
*
|
|
1028
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
|
|
1029
|
+
*/
|
|
1030
|
+
C() {
|
|
616
1031
|
let { c: scanner } = this;
|
|
617
1032
|
if (!scanner.b("&")) {
|
|
618
1033
|
return false;
|
|
619
1034
|
}
|
|
620
|
-
let ref = scanner.
|
|
621
|
-
if (scanner.
|
|
1035
|
+
let ref = scanner.v(isReferenceChar);
|
|
1036
|
+
if (scanner.F() !== ";") {
|
|
622
1037
|
throw this.a("Unterminated reference (a reference must end with `;`)");
|
|
623
1038
|
}
|
|
624
1039
|
let parsedValue;
|
|
@@ -637,7 +1052,7 @@ var Parser = class {
|
|
|
637
1052
|
let {
|
|
638
1053
|
ignoreUndefinedEntities,
|
|
639
1054
|
resolveUndefinedEntity
|
|
640
|
-
} = this.
|
|
1055
|
+
} = this.f;
|
|
641
1056
|
let wrappedRef = `&${ref};`;
|
|
642
1057
|
if (resolveUndefinedEntity) {
|
|
643
1058
|
let resolvedValue = resolveUndefinedEntity(wrappedRef);
|
|
@@ -658,42 +1073,70 @@ var Parser = class {
|
|
|
658
1073
|
}
|
|
659
1074
|
return parsedValue;
|
|
660
1075
|
}
|
|
661
|
-
|
|
1076
|
+
/**
|
|
1077
|
+
* Consumes a `SystemLiteral` if possible.
|
|
1078
|
+
*
|
|
1079
|
+
* A `SystemLiteral` is similar to an attribute value, but allows the
|
|
1080
|
+
* characters `<` and `&` and doesn't replace references.
|
|
1081
|
+
*
|
|
1082
|
+
* @returns
|
|
1083
|
+
* Value of the `SystemLiteral` minus quotes, or `false` if nothing was
|
|
1084
|
+
* consumed. An empty string indicates that a `SystemLiteral` was consumed
|
|
1085
|
+
* but was empty.
|
|
1086
|
+
*
|
|
1087
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
|
|
1088
|
+
*/
|
|
1089
|
+
s() {
|
|
662
1090
|
let { c: scanner } = this;
|
|
663
1091
|
let quote = scanner.b('"') || scanner.b("'");
|
|
664
1092
|
if (!quote) {
|
|
665
1093
|
return false;
|
|
666
1094
|
}
|
|
667
|
-
let value = scanner.
|
|
668
|
-
this.
|
|
1095
|
+
let value = scanner.t(quote);
|
|
1096
|
+
this.p(value);
|
|
669
1097
|
if (!scanner.b(quote)) {
|
|
670
1098
|
throw this.a("Missing end quote");
|
|
671
1099
|
}
|
|
672
1100
|
return value;
|
|
673
1101
|
}
|
|
1102
|
+
/**
|
|
1103
|
+
* Consumes one or more whitespace characters if possible.
|
|
1104
|
+
*
|
|
1105
|
+
* @returns Whether any whitespace characters were consumed.
|
|
1106
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
1107
|
+
*/
|
|
674
1108
|
e() {
|
|
675
|
-
return
|
|
676
|
-
}
|
|
677
|
-
|
|
1109
|
+
return !!this.c.v(isWhitespace);
|
|
1110
|
+
}
|
|
1111
|
+
/**
|
|
1112
|
+
* Consumes an XML declaration if possible.
|
|
1113
|
+
*
|
|
1114
|
+
* @returns Whether an XML declaration was consumed.
|
|
1115
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
|
|
1116
|
+
*/
|
|
1117
|
+
P() {
|
|
678
1118
|
let { c: scanner } = this;
|
|
1119
|
+
let startIndex = scanner.d;
|
|
679
1120
|
if (!scanner.b("<?xml")) {
|
|
680
1121
|
return false;
|
|
681
1122
|
}
|
|
682
1123
|
if (!this.e()) {
|
|
683
1124
|
throw this.a("Invalid XML declaration");
|
|
684
1125
|
}
|
|
685
|
-
let version =
|
|
1126
|
+
let version = !!scanner.b("version") && this.u() && this.s();
|
|
686
1127
|
if (version === false) {
|
|
687
1128
|
throw this.a("XML version is missing or invalid");
|
|
688
1129
|
} else if (!/^1\.[0-9]+$/.test(version)) {
|
|
689
1130
|
throw this.a("Invalid character in version number");
|
|
690
1131
|
}
|
|
1132
|
+
let encoding;
|
|
1133
|
+
let standalone;
|
|
691
1134
|
if (this.e()) {
|
|
692
|
-
|
|
1135
|
+
encoding = !!scanner.b("encoding") && this.u() && this.s();
|
|
693
1136
|
if (encoding) {
|
|
694
1137
|
this.e();
|
|
695
1138
|
}
|
|
696
|
-
|
|
1139
|
+
standalone = !!scanner.b("standalone") && this.u() && this.s();
|
|
697
1140
|
if (standalone) {
|
|
698
1141
|
if (standalone !== "yes" && standalone !== "no") {
|
|
699
1142
|
throw this.a('Only "yes" and "no" are permitted as values of `standalone`');
|
|
@@ -704,49 +1147,24 @@ var Parser = class {
|
|
|
704
1147
|
if (!scanner.b("?>")) {
|
|
705
1148
|
throw this.a("Invalid or unclosed XML declaration");
|
|
706
1149
|
}
|
|
707
|
-
return
|
|
708
|
-
|
|
1150
|
+
return this.f.preserveXmlDeclaration ? this.j(new XmlDeclaration(
|
|
1151
|
+
version,
|
|
1152
|
+
encoding || void 0,
|
|
1153
|
+
standalone || void 0
|
|
1154
|
+
), startIndex) : true;
|
|
1155
|
+
}
|
|
1156
|
+
/**
|
|
1157
|
+
* Returns an `XmlError` for the current scanner position.
|
|
1158
|
+
*/
|
|
709
1159
|
a(message) {
|
|
710
|
-
let {
|
|
711
|
-
|
|
712
|
-
let excerpt = "";
|
|
713
|
-
let line = 1;
|
|
714
|
-
for (let i = 0; i < charIndex; ++i) {
|
|
715
|
-
let char = xml[i];
|
|
716
|
-
if (char === "\n") {
|
|
717
|
-
column = 1;
|
|
718
|
-
excerpt = "";
|
|
719
|
-
line += 1;
|
|
720
|
-
} else {
|
|
721
|
-
column += 1;
|
|
722
|
-
excerpt += char;
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
let eol = xml.indexOf("\n", charIndex);
|
|
726
|
-
excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
|
|
727
|
-
let excerptStart = 0;
|
|
728
|
-
if (excerpt.length > 50) {
|
|
729
|
-
if (column < 40) {
|
|
730
|
-
excerpt = excerpt.slice(0, 50);
|
|
731
|
-
} else {
|
|
732
|
-
excerptStart = column - 20;
|
|
733
|
-
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
|
-
let err = new Error(
|
|
737
|
-
`${message} (line ${line}, column ${column})
|
|
738
|
-
${excerpt}
|
|
739
|
-
` + " ".repeat(column - excerptStart + 1) + "^\n"
|
|
740
|
-
);
|
|
741
|
-
Object.assign(err, {
|
|
742
|
-
column,
|
|
743
|
-
excerpt,
|
|
744
|
-
line,
|
|
745
|
-
pos: charIndex
|
|
746
|
-
});
|
|
747
|
-
return err;
|
|
1160
|
+
let { c: scanner } = this;
|
|
1161
|
+
return new XmlError(message, scanner.d, scanner.m);
|
|
748
1162
|
}
|
|
749
|
-
|
|
1163
|
+
/**
|
|
1164
|
+
* Throws an invalid character error if any character in the given _string_
|
|
1165
|
+
* isn't a valid XML character.
|
|
1166
|
+
*/
|
|
1167
|
+
p(string) {
|
|
750
1168
|
let { length } = string;
|
|
751
1169
|
for (let i = 0; i < length; ++i) {
|
|
752
1170
|
let cp = string.codePointAt(i);
|
|
@@ -760,11 +1178,12 @@ var Parser = class {
|
|
|
760
1178
|
}
|
|
761
1179
|
}
|
|
762
1180
|
};
|
|
763
|
-
function
|
|
764
|
-
|
|
765
|
-
|
|
1181
|
+
function normalizeLineBreaks(text) {
|
|
1182
|
+
let i = 0;
|
|
1183
|
+
while ((i = text.indexOf("\r", i)) !== -1) {
|
|
1184
|
+
text = text[i + 1] === "\n" ? text.slice(0, i) + text.slice(i + 1) : text.slice(0, i) + "\n" + text.slice(i + 1);
|
|
766
1185
|
}
|
|
767
|
-
return
|
|
1186
|
+
return text;
|
|
768
1187
|
}
|
|
769
1188
|
|
|
770
1189
|
// src/index.ts
|