@rgrove/parse-xml 4.0.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -31
- package/dist/browser.js +692 -300
- package/dist/browser.js.map +4 -4
- package/dist/global.min.js +9 -8
- package/dist/global.min.js.map +4 -4
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -2
- package/dist/index.js.map +1 -1
- package/dist/lib/Parser.d.ts +53 -6
- package/dist/lib/Parser.d.ts.map +1 -1
- package/dist/lib/Parser.js +166 -126
- package/dist/lib/Parser.js.map +1 -1
- package/dist/lib/StringScanner.d.ts +15 -21
- package/dist/lib/StringScanner.d.ts.map +1 -1
- package/dist/lib/StringScanner.js +63 -86
- package/dist/lib/StringScanner.js.map +1 -1
- package/dist/lib/XmlDeclaration.d.ts +30 -0
- package/dist/lib/XmlDeclaration.d.ts.map +1 -0
- package/dist/lib/XmlDeclaration.js +36 -0
- package/dist/lib/XmlDeclaration.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +4 -2
- package/dist/lib/XmlDocument.d.ts.map +1 -1
- package/dist/lib/XmlDocument.js.map +1 -1
- package/dist/lib/XmlDocumentType.d.ts +37 -0
- package/dist/lib/XmlDocumentType.d.ts.map +1 -0
- package/dist/lib/XmlDocumentType.js +39 -0
- package/dist/lib/XmlDocumentType.js.map +1 -0
- package/dist/lib/XmlElement.js.map +1 -1
- package/dist/lib/XmlError.d.ts +24 -0
- package/dist/lib/XmlError.d.ts.map +1 -0
- package/dist/lib/XmlError.js +52 -0
- package/dist/lib/XmlError.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +20 -1
- package/dist/lib/XmlNode.d.ts.map +1 -1
- package/dist/lib/XmlNode.js +28 -3
- package/dist/lib/XmlNode.js.map +1 -1
- package/dist/lib/syntax.d.ts.map +1 -1
- package/dist/lib/syntax.js +18 -23
- package/dist/lib/syntax.js.map +1 -1
- package/dist/lib/types.d.ts +2 -2
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +20 -23
- package/src/index.ts +3 -0
- package/src/lib/Parser.ts +228 -141
- package/src/lib/StringScanner.ts +66 -103
- package/src/lib/XmlDeclaration.ts +58 -0
- package/src/lib/XmlDocument.ts +4 -2
- package/src/lib/XmlDocumentType.ts +67 -0
- package/src/lib/XmlError.ts +80 -0
- package/src/lib/XmlNode.ts +33 -3
- package/src/lib/syntax.ts +12 -18
package/dist/browser.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
/*! @rgrove/parse-xml v4.0
|
|
1
|
+
/*! @rgrove/parse-xml v4.2.0 | ISC License | Copyright Ryan Grove */
|
|
2
2
|
"use strict";
|
|
3
3
|
var __defProp = Object.defineProperty;
|
|
4
4
|
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
@@ -23,8 +23,11 @@ var src_exports = {};
|
|
|
23
23
|
__export(src_exports, {
|
|
24
24
|
XmlCdata: () => XmlCdata,
|
|
25
25
|
XmlComment: () => XmlComment,
|
|
26
|
+
XmlDeclaration: () => XmlDeclaration,
|
|
26
27
|
XmlDocument: () => XmlDocument,
|
|
28
|
+
XmlDocumentType: () => XmlDocumentType,
|
|
27
29
|
XmlElement: () => XmlElement,
|
|
30
|
+
XmlError: () => XmlError,
|
|
28
31
|
XmlNode: () => XmlNode,
|
|
29
32
|
XmlProcessingInstruction: () => XmlProcessingInstruction,
|
|
30
33
|
XmlText: () => XmlText,
|
|
@@ -37,125 +40,168 @@ var emptyString = "";
|
|
|
37
40
|
var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
|
|
38
41
|
var StringScanner = class {
|
|
39
42
|
constructor(string) {
|
|
40
|
-
this.
|
|
43
|
+
this.k = this.u(string, true);
|
|
41
44
|
this.d = 0;
|
|
42
45
|
this.length = string.length;
|
|
43
|
-
this.
|
|
44
|
-
this.
|
|
45
|
-
if (this.
|
|
46
|
+
this.l = this.k !== this.length;
|
|
47
|
+
this.h = string;
|
|
48
|
+
if (this.l) {
|
|
46
49
|
let charsToBytes = [];
|
|
47
|
-
for (let byteIndex = 0, charIndex = 0; charIndex < this.
|
|
50
|
+
for (let byteIndex = 0, charIndex = 0; charIndex < this.k; ++charIndex) {
|
|
48
51
|
charsToBytes[charIndex] = byteIndex;
|
|
49
52
|
byteIndex += string.codePointAt(byteIndex) > 65535 ? 2 : 1;
|
|
50
53
|
}
|
|
51
|
-
this.
|
|
54
|
+
this.A = charsToBytes;
|
|
52
55
|
}
|
|
53
56
|
}
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
57
|
+
/**
|
|
58
|
+
* Whether the current character index is at the end of the input string.
|
|
59
|
+
*/
|
|
60
|
+
get B() {
|
|
61
|
+
return this.d >= this.k;
|
|
62
|
+
}
|
|
63
|
+
// -- Protected Methods ------------------------------------------------------
|
|
64
|
+
/**
|
|
65
|
+
* Returns the number of characters in the given string, which may differ from
|
|
66
|
+
* the byte length if the string contains multibyte characters.
|
|
67
|
+
*/
|
|
68
|
+
u(string, multiByteSafe = this.l) {
|
|
62
69
|
return multiByteSafe ? string.replace(surrogatePair, "_").length : string.length;
|
|
63
70
|
}
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
71
|
+
// -- Public Methods ---------------------------------------------------------
|
|
72
|
+
/**
|
|
73
|
+
* Advances the scanner by the given number of characters, stopping if the end
|
|
74
|
+
* of the string is reached.
|
|
75
|
+
*/
|
|
76
|
+
p(count = 1) {
|
|
77
|
+
this.d = Math.min(this.k, this.d + count);
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Returns the byte index of the given character index in the string. The two
|
|
81
|
+
* may differ in strings that contain multibyte characters.
|
|
82
|
+
*/
|
|
83
|
+
f(charIndex = this.d) {
|
|
84
|
+
var _a;
|
|
85
|
+
return this.l ? (_a = this.A[charIndex]) != null ? _a : Infinity : charIndex;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Consumes and returns the given number of characters if possible, advancing
|
|
89
|
+
* the scanner and stopping if the end of the string is reached.
|
|
90
|
+
*
|
|
91
|
+
* If no characters could be consumed, an empty string will be returned.
|
|
92
|
+
*/
|
|
93
|
+
G(charCount = 1) {
|
|
94
|
+
let chars = this.m(charCount);
|
|
95
|
+
this.p(charCount);
|
|
70
96
|
return chars;
|
|
71
97
|
}
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
this.f(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
let char;
|
|
87
|
-
let match = emptyString;
|
|
88
|
-
while ((char = this.g()) && fn(char)) {
|
|
89
|
-
match += char;
|
|
90
|
-
this.f();
|
|
91
|
-
}
|
|
92
|
-
return match;
|
|
98
|
+
/**
|
|
99
|
+
* Consumes and returns the given number of bytes if possible, advancing the
|
|
100
|
+
* scanner and stopping if the end of the string is reached.
|
|
101
|
+
*
|
|
102
|
+
* It's up to the caller to ensure that the given byte count doesn't split a
|
|
103
|
+
* multibyte character.
|
|
104
|
+
*
|
|
105
|
+
* If no bytes could be consumed, an empty string will be returned.
|
|
106
|
+
*/
|
|
107
|
+
v(byteCount) {
|
|
108
|
+
let byteIndex = this.f();
|
|
109
|
+
let result = this.h.slice(byteIndex, byteIndex + byteCount);
|
|
110
|
+
this.p(this.u(result));
|
|
111
|
+
return result;
|
|
93
112
|
}
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
113
|
+
/**
|
|
114
|
+
* Consumes and returns all characters for which the given function returns
|
|
115
|
+
* `true`, stopping when `false` is returned or the end of the input is
|
|
116
|
+
* reached.
|
|
117
|
+
*/
|
|
118
|
+
w(fn) {
|
|
119
|
+
let { length, l: multiByteMode, h: string } = this;
|
|
120
|
+
let startByteIndex = this.f();
|
|
121
|
+
let endByteIndex = startByteIndex;
|
|
122
|
+
if (multiByteMode) {
|
|
123
|
+
while (endByteIndex < length) {
|
|
124
|
+
let char = string[endByteIndex];
|
|
125
|
+
let isSurrogatePair = char >= "\uD800" && char <= "\uDBFF";
|
|
126
|
+
if (isSurrogatePair) {
|
|
127
|
+
char += string[endByteIndex + 1];
|
|
128
|
+
}
|
|
129
|
+
if (!fn(char)) {
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
endByteIndex += isSurrogatePair ? 2 : 1;
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
while (endByteIndex < length && fn(string[endByteIndex])) {
|
|
136
|
+
++endByteIndex;
|
|
104
137
|
}
|
|
105
138
|
}
|
|
106
|
-
return
|
|
107
|
-
}
|
|
139
|
+
return this.v(endByteIndex - startByteIndex);
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Consumes the given string if it exists at the current character index, and
|
|
143
|
+
* advances the scanner.
|
|
144
|
+
*
|
|
145
|
+
* If the given string doesn't exist at the current character index, an empty
|
|
146
|
+
* string will be returned and the scanner will not be advanced.
|
|
147
|
+
*/
|
|
108
148
|
b(stringToConsume) {
|
|
109
149
|
let { length } = stringToConsume;
|
|
110
|
-
|
|
111
|
-
|
|
150
|
+
let byteIndex = this.f();
|
|
151
|
+
if (stringToConsume === this.h.slice(byteIndex, byteIndex + length)) {
|
|
152
|
+
this.p(length === 1 ? 1 : this.u(stringToConsume));
|
|
112
153
|
return stringToConsume;
|
|
113
154
|
}
|
|
114
155
|
return emptyString;
|
|
115
156
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
157
|
+
/**
|
|
158
|
+
* Consumes characters until the given global regex is matched, advancing the
|
|
159
|
+
* scanner up to (but not beyond) the beginning of the match. If the regex
|
|
160
|
+
* doesn't match, nothing will be consumed.
|
|
161
|
+
*
|
|
162
|
+
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
163
|
+
*/
|
|
164
|
+
x(regex) {
|
|
165
|
+
let matchByteIndex = this.h.slice(this.f()).search(regex);
|
|
166
|
+
return matchByteIndex > 0 ? this.v(matchByteIndex) : emptyString;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Consumes characters until the given string is found, advancing the scanner
|
|
170
|
+
* up to (but not beyond) that point. If the string is never found, nothing
|
|
171
|
+
* will be consumed.
|
|
172
|
+
*
|
|
173
|
+
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
174
|
+
*/
|
|
175
|
+
s(searchString) {
|
|
176
|
+
let byteIndex = this.f();
|
|
177
|
+
let matchByteIndex = this.h.indexOf(searchString, byteIndex);
|
|
178
|
+
return matchByteIndex > 0 ? this.v(matchByteIndex - byteIndex) : emptyString;
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Returns the given number of characters starting at the current character
|
|
182
|
+
* index, without advancing the scanner and without exceeding the end of the
|
|
183
|
+
* input string.
|
|
184
|
+
*/
|
|
185
|
+
m(count = 1) {
|
|
186
|
+
let { d: charIndex, h: string } = this;
|
|
187
|
+
return this.l ? string.slice(this.f(charIndex), this.f(charIndex + count)) : string.slice(charIndex, charIndex + count);
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Resets the scanner position to the given character _index_, or to the start
|
|
191
|
+
* of the input string if no index is given.
|
|
192
|
+
*
|
|
193
|
+
* If _index_ is negative, the scanner position will be moved backward by that
|
|
194
|
+
* many characters, stopping if the beginning of the string is reached.
|
|
195
|
+
*/
|
|
196
|
+
n(index = 0) {
|
|
197
|
+
this.d = index >= 0 ? Math.min(this.k, index) : Math.max(0, this.d + index);
|
|
152
198
|
}
|
|
153
199
|
};
|
|
154
200
|
|
|
155
201
|
// src/lib/syntax.ts
|
|
156
|
-
var attValueCharDoubleQuote = /[
|
|
157
|
-
var attValueCharSingleQuote = /[
|
|
158
|
-
var attValueNormalizedWhitespace =
|
|
202
|
+
var attValueCharDoubleQuote = /["&<]/;
|
|
203
|
+
var attValueCharSingleQuote = /['&<]/;
|
|
204
|
+
var attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
|
|
159
205
|
var endCharData = /<|&|]]>/;
|
|
160
206
|
var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.create(null), {
|
|
161
207
|
amp: "&",
|
|
@@ -165,45 +211,88 @@ var predefinedEntities = Object.freeze(Object.assign(/* @__PURE__ */ Object.crea
|
|
|
165
211
|
quot: '"'
|
|
166
212
|
}));
|
|
167
213
|
function isNameChar(char) {
|
|
168
|
-
let cp =
|
|
169
|
-
return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp
|
|
214
|
+
let cp = char.codePointAt(0);
|
|
215
|
+
return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp >= 48 && cp <= 57 || cp === 45 || cp === 46 || cp === 183 || cp >= 768 && cp <= 879 || cp === 8255 || cp === 8256 || isNameStartChar(char, cp);
|
|
170
216
|
}
|
|
171
|
-
function isNameStartChar(char, cp =
|
|
172
|
-
return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp
|
|
217
|
+
function isNameStartChar(char, cp = char.codePointAt(0)) {
|
|
218
|
+
return cp >= 97 && cp <= 122 || cp >= 65 && cp <= 90 || cp === 58 || cp === 95 || cp >= 192 && cp <= 214 || cp >= 216 && cp <= 246 || cp >= 248 && cp <= 767 || cp >= 880 && cp <= 893 || cp >= 895 && cp <= 8191 || cp === 8204 || cp === 8205 || cp >= 8304 && cp <= 8591 || cp >= 11264 && cp <= 12271 || cp >= 12289 && cp <= 55295 || cp >= 63744 && cp <= 64975 || cp >= 65008 && cp <= 65533 || cp >= 65536 && cp <= 983039;
|
|
173
219
|
}
|
|
174
220
|
function isReferenceChar(char) {
|
|
175
221
|
return char === "#" || isNameChar(char);
|
|
176
222
|
}
|
|
177
223
|
function isWhitespace(char) {
|
|
178
|
-
let cp =
|
|
224
|
+
let cp = char.codePointAt(0);
|
|
179
225
|
return cp === 32 || cp === 9 || cp === 10 || cp === 13;
|
|
180
226
|
}
|
|
181
227
|
function isXmlCodePoint(cp) {
|
|
182
|
-
return cp
|
|
183
|
-
}
|
|
184
|
-
function getCodePoint(char) {
|
|
185
|
-
return char.codePointAt(0) || -1;
|
|
228
|
+
return cp >= 32 && cp <= 55295 || cp === 10 || cp === 9 || cp === 13 || cp >= 57344 && cp <= 65533 || cp >= 65536 && cp <= 1114111;
|
|
186
229
|
}
|
|
187
230
|
|
|
188
231
|
// src/lib/XmlNode.ts
|
|
189
|
-
var
|
|
232
|
+
var _XmlNode = class _XmlNode {
|
|
190
233
|
constructor() {
|
|
234
|
+
/**
|
|
235
|
+
* Parent node of this node, or `null` if this node has no parent.
|
|
236
|
+
*/
|
|
191
237
|
this.parent = null;
|
|
192
|
-
|
|
238
|
+
/**
|
|
239
|
+
* Starting byte offset of this node in the original XML string, or `-1` if
|
|
240
|
+
* the offset is unknown.
|
|
241
|
+
*/
|
|
242
|
+
this.start = -1;
|
|
243
|
+
/**
|
|
244
|
+
* Ending byte offset of this node in the original XML string, or `-1` if the
|
|
245
|
+
* offset is unknown.
|
|
246
|
+
*/
|
|
247
|
+
this.end = -1;
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Document that contains this node, or `null` if this node is not associated
|
|
251
|
+
* with a document.
|
|
252
|
+
*/
|
|
193
253
|
get document() {
|
|
194
254
|
var _a, _b;
|
|
195
255
|
return (_b = (_a = this.parent) == null ? void 0 : _a.document) != null ? _b : null;
|
|
196
256
|
}
|
|
257
|
+
/**
|
|
258
|
+
* Whether this node is the root node of the document (also known as the
|
|
259
|
+
* document element).
|
|
260
|
+
*/
|
|
197
261
|
get isRootNode() {
|
|
198
|
-
return this.parent !== null && this.parent === this.document;
|
|
199
|
-
}
|
|
262
|
+
return this.parent !== null && this.parent === this.document && this.type === _XmlNode.TYPE_ELEMENT;
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Whether whitespace should be preserved in the content of this element and
|
|
266
|
+
* its children.
|
|
267
|
+
*
|
|
268
|
+
* This is influenced by the value of the special `xml:space` attribute, and
|
|
269
|
+
* will be `true` for any node whose `xml:space` attribute is set to
|
|
270
|
+
* "preserve". If a node has no such attribute, it will inherit the value of
|
|
271
|
+
* the nearest ancestor that does (if any).
|
|
272
|
+
*
|
|
273
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
|
|
274
|
+
*/
|
|
200
275
|
get preserveWhitespace() {
|
|
201
276
|
var _a;
|
|
202
|
-
return
|
|
203
|
-
}
|
|
277
|
+
return !!((_a = this.parent) == null ? void 0 : _a.preserveWhitespace);
|
|
278
|
+
}
|
|
279
|
+
/**
|
|
280
|
+
* Type of this node.
|
|
281
|
+
*
|
|
282
|
+
* The value of this property is a string that matches one of the static
|
|
283
|
+
* `TYPE_*` properties on the `XmlNode` class (e.g. `TYPE_ELEMENT`,
|
|
284
|
+
* `TYPE_TEXT`, etc.).
|
|
285
|
+
*
|
|
286
|
+
* The `XmlNode` class itself is a base class and doesn't have its own type
|
|
287
|
+
* name.
|
|
288
|
+
*/
|
|
204
289
|
get type() {
|
|
205
290
|
return "";
|
|
206
291
|
}
|
|
292
|
+
/**
|
|
293
|
+
* Returns a JSON-serializable object representing this node, minus properties
|
|
294
|
+
* that could result in circular references.
|
|
295
|
+
*/
|
|
207
296
|
toJSON() {
|
|
208
297
|
let json = {
|
|
209
298
|
type: this.type
|
|
@@ -214,15 +303,46 @@ var XmlNode = class {
|
|
|
214
303
|
if (this.preserveWhitespace) {
|
|
215
304
|
json.preserveWhitespace = true;
|
|
216
305
|
}
|
|
306
|
+
if (this.start !== -1) {
|
|
307
|
+
json.start = this.start;
|
|
308
|
+
json.end = this.end;
|
|
309
|
+
}
|
|
217
310
|
return json;
|
|
218
311
|
}
|
|
219
312
|
};
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
313
|
+
/**
|
|
314
|
+
* Type value for an `XmlCdata` node.
|
|
315
|
+
*/
|
|
316
|
+
_XmlNode.TYPE_CDATA = "cdata";
|
|
317
|
+
/**
|
|
318
|
+
* Type value for an `XmlComment` node.
|
|
319
|
+
*/
|
|
320
|
+
_XmlNode.TYPE_COMMENT = "comment";
|
|
321
|
+
/**
|
|
322
|
+
* Type value for an `XmlDocument` node.
|
|
323
|
+
*/
|
|
324
|
+
_XmlNode.TYPE_DOCUMENT = "document";
|
|
325
|
+
/**
|
|
326
|
+
* Type value for an `XmlDocumentType` node.
|
|
327
|
+
*/
|
|
328
|
+
_XmlNode.TYPE_DOCUMENT_TYPE = "doctype";
|
|
329
|
+
/**
|
|
330
|
+
* Type value for an `XmlElement` node.
|
|
331
|
+
*/
|
|
332
|
+
_XmlNode.TYPE_ELEMENT = "element";
|
|
333
|
+
/**
|
|
334
|
+
* Type value for an `XmlProcessingInstruction` node.
|
|
335
|
+
*/
|
|
336
|
+
_XmlNode.TYPE_PROCESSING_INSTRUCTION = "pi";
|
|
337
|
+
/**
|
|
338
|
+
* Type value for an `XmlText` node.
|
|
339
|
+
*/
|
|
340
|
+
_XmlNode.TYPE_TEXT = "text";
|
|
341
|
+
/**
|
|
342
|
+
* Type value for an `XmlDeclaration` node.
|
|
343
|
+
*/
|
|
344
|
+
_XmlNode.TYPE_XML_DECLARATION = "xmldecl";
|
|
345
|
+
var XmlNode = _XmlNode;
|
|
226
346
|
|
|
227
347
|
// src/lib/XmlText.ts
|
|
228
348
|
var XmlText = class extends XmlNode {
|
|
@@ -263,20 +383,46 @@ var XmlComment = class extends XmlNode {
|
|
|
263
383
|
}
|
|
264
384
|
};
|
|
265
385
|
|
|
386
|
+
// src/lib/XmlDeclaration.ts
|
|
387
|
+
var XmlDeclaration = class extends XmlNode {
|
|
388
|
+
constructor(version, encoding, standalone) {
|
|
389
|
+
super();
|
|
390
|
+
this.version = version;
|
|
391
|
+
this.encoding = encoding != null ? encoding : null;
|
|
392
|
+
this.standalone = standalone != null ? standalone : null;
|
|
393
|
+
}
|
|
394
|
+
get type() {
|
|
395
|
+
return XmlNode.TYPE_XML_DECLARATION;
|
|
396
|
+
}
|
|
397
|
+
toJSON() {
|
|
398
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
399
|
+
json.version = this.version;
|
|
400
|
+
for (let key of ["encoding", "standalone"]) {
|
|
401
|
+
if (this[key] !== null) {
|
|
402
|
+
json[key] = this[key];
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return json;
|
|
406
|
+
}
|
|
407
|
+
};
|
|
408
|
+
|
|
266
409
|
// src/lib/XmlElement.ts
|
|
267
|
-
var XmlElement = class extends XmlNode {
|
|
410
|
+
var XmlElement = class _XmlElement extends XmlNode {
|
|
268
411
|
constructor(name, attributes = /* @__PURE__ */ Object.create(null), children = []) {
|
|
269
412
|
super();
|
|
270
413
|
this.name = name;
|
|
271
414
|
this.attributes = attributes;
|
|
272
415
|
this.children = children;
|
|
273
416
|
}
|
|
417
|
+
/**
|
|
418
|
+
* Whether this element is empty (meaning it has no children).
|
|
419
|
+
*/
|
|
274
420
|
get isEmpty() {
|
|
275
421
|
return this.children.length === 0;
|
|
276
422
|
}
|
|
277
423
|
get preserveWhitespace() {
|
|
278
424
|
let node = this;
|
|
279
|
-
while (node instanceof
|
|
425
|
+
while (node instanceof _XmlElement) {
|
|
280
426
|
if ("xml:space" in node.attributes) {
|
|
281
427
|
return node.attributes["xml:space"] === "preserve";
|
|
282
428
|
}
|
|
@@ -284,6 +430,9 @@ var XmlElement = class extends XmlNode {
|
|
|
284
430
|
}
|
|
285
431
|
return false;
|
|
286
432
|
}
|
|
433
|
+
/**
|
|
434
|
+
* Text content of this element and all its descendants.
|
|
435
|
+
*/
|
|
287
436
|
get text() {
|
|
288
437
|
return this.children.map((child) => "text" in child ? child.text : "").join("");
|
|
289
438
|
}
|
|
@@ -308,6 +457,9 @@ var XmlDocument = class extends XmlNode {
|
|
|
308
457
|
get document() {
|
|
309
458
|
return this;
|
|
310
459
|
}
|
|
460
|
+
/**
|
|
461
|
+
* Root element of this document, or `null` if this document is empty.
|
|
462
|
+
*/
|
|
311
463
|
get root() {
|
|
312
464
|
for (let child of this.children) {
|
|
313
465
|
if (child instanceof XmlElement) {
|
|
@@ -316,6 +468,9 @@ var XmlDocument = class extends XmlNode {
|
|
|
316
468
|
}
|
|
317
469
|
return null;
|
|
318
470
|
}
|
|
471
|
+
/**
|
|
472
|
+
* Text content of this document and all its descendants.
|
|
473
|
+
*/
|
|
319
474
|
get text() {
|
|
320
475
|
return this.children.map((child) => "text" in child ? child.text : "").join("");
|
|
321
476
|
}
|
|
@@ -329,6 +484,71 @@ var XmlDocument = class extends XmlNode {
|
|
|
329
484
|
}
|
|
330
485
|
};
|
|
331
486
|
|
|
487
|
+
// src/lib/XmlDocumentType.ts
|
|
488
|
+
var XmlDocumentType = class extends XmlNode {
|
|
489
|
+
constructor(name, publicId, systemId, internalSubset) {
|
|
490
|
+
super();
|
|
491
|
+
this.name = name;
|
|
492
|
+
this.publicId = publicId != null ? publicId : null;
|
|
493
|
+
this.systemId = systemId != null ? systemId : null;
|
|
494
|
+
this.internalSubset = internalSubset != null ? internalSubset : null;
|
|
495
|
+
}
|
|
496
|
+
get type() {
|
|
497
|
+
return XmlNode.TYPE_DOCUMENT_TYPE;
|
|
498
|
+
}
|
|
499
|
+
toJSON() {
|
|
500
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
501
|
+
json.name = this.name;
|
|
502
|
+
for (let key of ["publicId", "systemId", "internalSubset"]) {
|
|
503
|
+
if (this[key] !== null) {
|
|
504
|
+
json[key] = this[key];
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
return json;
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
|
|
511
|
+
// src/lib/XmlError.ts
|
|
512
|
+
var XmlError = class extends Error {
|
|
513
|
+
constructor(message, charIndex, xml) {
|
|
514
|
+
let column = 1;
|
|
515
|
+
let excerpt = "";
|
|
516
|
+
let line = 1;
|
|
517
|
+
for (let i = 0; i < charIndex; ++i) {
|
|
518
|
+
let char = xml[i];
|
|
519
|
+
if (char === "\n") {
|
|
520
|
+
column = 1;
|
|
521
|
+
excerpt = "";
|
|
522
|
+
line += 1;
|
|
523
|
+
} else {
|
|
524
|
+
column += 1;
|
|
525
|
+
excerpt += char;
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
let eol = xml.indexOf("\n", charIndex);
|
|
529
|
+
excerpt += eol === -1 ? xml.slice(charIndex) : xml.slice(charIndex, eol);
|
|
530
|
+
let excerptStart = 0;
|
|
531
|
+
if (excerpt.length > 50) {
|
|
532
|
+
if (column < 40) {
|
|
533
|
+
excerpt = excerpt.slice(0, 50);
|
|
534
|
+
} else {
|
|
535
|
+
excerptStart = column - 20;
|
|
536
|
+
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
super(
|
|
540
|
+
`${message} (line ${line}, column ${column})
|
|
541
|
+
${excerpt}
|
|
542
|
+
` + " ".repeat(column - excerptStart + 1) + "^\n"
|
|
543
|
+
);
|
|
544
|
+
this.column = column;
|
|
545
|
+
this.excerpt = excerpt;
|
|
546
|
+
this.line = line;
|
|
547
|
+
this.name = "XmlError";
|
|
548
|
+
this.pos = charIndex;
|
|
549
|
+
}
|
|
550
|
+
};
|
|
551
|
+
|
|
332
552
|
// src/lib/XmlProcessingInstruction.ts
|
|
333
553
|
var XmlProcessingInstruction = class extends XmlNode {
|
|
334
554
|
constructor(name, content = "") {
|
|
@@ -350,45 +570,67 @@ var XmlProcessingInstruction = class extends XmlNode {
|
|
|
350
570
|
// src/lib/Parser.ts
|
|
351
571
|
var emptyString2 = "";
|
|
352
572
|
var Parser = class {
|
|
573
|
+
/**
|
|
574
|
+
* @param xml XML string to parse.
|
|
575
|
+
* @param options Parser options.
|
|
576
|
+
*/
|
|
353
577
|
constructor(xml, options = {}) {
|
|
354
|
-
this.document = new XmlDocument();
|
|
355
|
-
this.
|
|
356
|
-
this.
|
|
357
|
-
this.c = new StringScanner(
|
|
358
|
-
this.
|
|
359
|
-
|
|
360
|
-
|
|
578
|
+
let doc = this.document = new XmlDocument();
|
|
579
|
+
this.j = doc;
|
|
580
|
+
this.g = options;
|
|
581
|
+
this.c = new StringScanner(xml);
|
|
582
|
+
if (this.g.includeOffsets) {
|
|
583
|
+
doc.start = 0;
|
|
584
|
+
doc.end = xml.length;
|
|
361
585
|
}
|
|
362
|
-
|
|
586
|
+
this.parse();
|
|
587
|
+
}
|
|
588
|
+
/**
|
|
589
|
+
* Adds the given `XmlNode` as a child of `this.currentNode`.
|
|
590
|
+
*/
|
|
591
|
+
i(node, charIndex) {
|
|
592
|
+
node.parent = this.j;
|
|
593
|
+
if (this.g.includeOffsets) {
|
|
594
|
+
node.start = this.c.f(charIndex);
|
|
595
|
+
node.end = this.c.f();
|
|
363
596
|
}
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
k(node) {
|
|
369
|
-
node.parent = this.i;
|
|
370
|
-
this.i.children.push(node);
|
|
597
|
+
this.j.children.push(node);
|
|
598
|
+
return true;
|
|
371
599
|
}
|
|
372
|
-
|
|
373
|
-
|
|
600
|
+
/**
|
|
601
|
+
* Adds the given _text_ to the document, either by appending it to a
|
|
602
|
+
* preceding `XmlText` node (if possible) or by creating a new `XmlText` node.
|
|
603
|
+
*/
|
|
604
|
+
y(text, charIndex) {
|
|
605
|
+
let { children } = this.j;
|
|
374
606
|
let { length } = children;
|
|
607
|
+
text = normalizeLineBreaks(text);
|
|
375
608
|
if (length > 0) {
|
|
376
609
|
let prevNode = children[length - 1];
|
|
377
|
-
if (prevNode
|
|
378
|
-
|
|
379
|
-
|
|
610
|
+
if ((prevNode == null ? void 0 : prevNode.type) === XmlNode.TYPE_TEXT) {
|
|
611
|
+
let textNode = prevNode;
|
|
612
|
+
textNode.text += text;
|
|
613
|
+
if (this.g.includeOffsets) {
|
|
614
|
+
textNode.end = this.c.f();
|
|
615
|
+
}
|
|
616
|
+
return true;
|
|
380
617
|
}
|
|
381
618
|
}
|
|
382
|
-
this.
|
|
619
|
+
return this.i(new XmlText(text), charIndex);
|
|
383
620
|
}
|
|
384
|
-
|
|
621
|
+
/**
|
|
622
|
+
* Consumes element attributes.
|
|
623
|
+
*
|
|
624
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-starttags
|
|
625
|
+
*/
|
|
626
|
+
H() {
|
|
385
627
|
let attributes = /* @__PURE__ */ Object.create(null);
|
|
386
628
|
while (this.e()) {
|
|
387
629
|
let attrName = this.q();
|
|
388
630
|
if (!attrName) {
|
|
389
631
|
break;
|
|
390
632
|
}
|
|
391
|
-
let attrValue = this.
|
|
633
|
+
let attrValue = this.t() && this.I();
|
|
392
634
|
if (attrValue === false) {
|
|
393
635
|
throw this.a("Attribute value expected");
|
|
394
636
|
}
|
|
@@ -400,7 +642,7 @@ var Parser = class {
|
|
|
400
642
|
}
|
|
401
643
|
attributes[attrName] = attrValue;
|
|
402
644
|
}
|
|
403
|
-
if (this.
|
|
645
|
+
if (this.g.sortAttributes) {
|
|
404
646
|
let attrNames = Object.keys(attributes).sort();
|
|
405
647
|
let sortedAttributes = /* @__PURE__ */ Object.create(null);
|
|
406
648
|
for (let i = 0; i < attrNames.length; ++i) {
|
|
@@ -411,152 +653,227 @@ var Parser = class {
|
|
|
411
653
|
}
|
|
412
654
|
return attributes;
|
|
413
655
|
}
|
|
414
|
-
|
|
656
|
+
/**
|
|
657
|
+
* Consumes an `AttValue` (attribute value) if possible.
|
|
658
|
+
*
|
|
659
|
+
* @returns
|
|
660
|
+
* Contents of the `AttValue` minus quotes, or `false` if nothing was
|
|
661
|
+
* consumed. An empty string indicates that an `AttValue` was consumed but
|
|
662
|
+
* was empty.
|
|
663
|
+
*
|
|
664
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
|
665
|
+
*/
|
|
666
|
+
I() {
|
|
415
667
|
let { c: scanner } = this;
|
|
416
|
-
let quote = scanner.
|
|
668
|
+
let quote = scanner.m();
|
|
417
669
|
if (quote !== '"' && quote !== "'") {
|
|
418
670
|
return false;
|
|
419
671
|
}
|
|
420
|
-
scanner.
|
|
672
|
+
scanner.p();
|
|
421
673
|
let chars;
|
|
422
674
|
let isClosed = false;
|
|
423
675
|
let value = emptyString2;
|
|
424
676
|
let regex = quote === '"' ? attValueCharDoubleQuote : attValueCharSingleQuote;
|
|
425
|
-
matchLoop:
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
break matchLoop;
|
|
443
|
-
}
|
|
677
|
+
matchLoop: while (!scanner.B) {
|
|
678
|
+
chars = scanner.x(regex);
|
|
679
|
+
if (chars) {
|
|
680
|
+
this.o(chars);
|
|
681
|
+
value += chars.replace(attValueNormalizedWhitespace, " ");
|
|
682
|
+
}
|
|
683
|
+
switch (scanner.m()) {
|
|
684
|
+
case quote:
|
|
685
|
+
isClosed = true;
|
|
686
|
+
break matchLoop;
|
|
687
|
+
case "&":
|
|
688
|
+
value += this.C();
|
|
689
|
+
continue;
|
|
690
|
+
case "<":
|
|
691
|
+
throw this.a("Unescaped `<` is not allowed in an attribute value");
|
|
692
|
+
default:
|
|
693
|
+
break matchLoop;
|
|
444
694
|
}
|
|
695
|
+
}
|
|
445
696
|
if (!isClosed) {
|
|
446
697
|
throw this.a("Unclosed attribute");
|
|
447
698
|
}
|
|
448
|
-
scanner.
|
|
699
|
+
scanner.p();
|
|
449
700
|
return value;
|
|
450
701
|
}
|
|
451
|
-
|
|
702
|
+
/**
|
|
703
|
+
* Consumes a CDATA section if possible.
|
|
704
|
+
*
|
|
705
|
+
* @returns Whether a CDATA section was consumed.
|
|
706
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-cdata-sect
|
|
707
|
+
*/
|
|
708
|
+
J() {
|
|
452
709
|
let { c: scanner } = this;
|
|
710
|
+
let startIndex = scanner.d;
|
|
453
711
|
if (!scanner.b("<![CDATA[")) {
|
|
454
712
|
return false;
|
|
455
713
|
}
|
|
456
|
-
let text = scanner.
|
|
457
|
-
this.
|
|
714
|
+
let text = scanner.s("]]>");
|
|
715
|
+
this.o(text);
|
|
458
716
|
if (!scanner.b("]]>")) {
|
|
459
717
|
throw this.a("Unclosed CDATA section");
|
|
460
718
|
}
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
719
|
+
return this.g.preserveCdata ? this.i(new XmlCdata(normalizeLineBreaks(text)), startIndex) : this.y(text, startIndex);
|
|
720
|
+
}
|
|
721
|
+
/**
|
|
722
|
+
* Consumes character data if possible.
|
|
723
|
+
*
|
|
724
|
+
* @returns Whether character data was consumed.
|
|
725
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dt-chardata
|
|
726
|
+
*/
|
|
727
|
+
K() {
|
|
469
728
|
let { c: scanner } = this;
|
|
470
|
-
let
|
|
729
|
+
let startIndex = scanner.d;
|
|
730
|
+
let charData = scanner.x(endCharData);
|
|
471
731
|
if (!charData) {
|
|
472
732
|
return false;
|
|
473
733
|
}
|
|
474
|
-
this.
|
|
475
|
-
if (scanner.
|
|
734
|
+
this.o(charData);
|
|
735
|
+
if (scanner.m(3) === "]]>") {
|
|
476
736
|
throw this.a("Element content may not contain the CDATA section close delimiter `]]>`");
|
|
477
737
|
}
|
|
478
|
-
this.
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
738
|
+
return this.y(charData, startIndex);
|
|
739
|
+
}
|
|
740
|
+
/**
|
|
741
|
+
* Consumes a comment if possible.
|
|
742
|
+
*
|
|
743
|
+
* @returns Whether a comment was consumed.
|
|
744
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Comment
|
|
745
|
+
*/
|
|
746
|
+
D() {
|
|
482
747
|
let { c: scanner } = this;
|
|
748
|
+
let startIndex = scanner.d;
|
|
483
749
|
if (!scanner.b("<!--")) {
|
|
484
750
|
return false;
|
|
485
751
|
}
|
|
486
|
-
let content = scanner.
|
|
487
|
-
this.
|
|
752
|
+
let content = scanner.s("--");
|
|
753
|
+
this.o(content);
|
|
488
754
|
if (!scanner.b("-->")) {
|
|
489
|
-
if (scanner.
|
|
755
|
+
if (scanner.m(2) === "--") {
|
|
490
756
|
throw this.a("The string `--` isn't allowed inside a comment");
|
|
491
757
|
}
|
|
492
758
|
throw this.a("Unclosed comment");
|
|
493
759
|
}
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
return false;
|
|
506
|
-
}
|
|
760
|
+
return this.g.preserveComments ? this.i(new XmlComment(normalizeLineBreaks(content)), startIndex) : true;
|
|
761
|
+
}
|
|
762
|
+
/**
|
|
763
|
+
* Consumes a reference in a content context if possible.
|
|
764
|
+
*
|
|
765
|
+
* This differs from `consumeReference()` in that a consumed reference will be
|
|
766
|
+
* added to the document as a text node instead of returned.
|
|
767
|
+
*
|
|
768
|
+
* @returns Whether a reference was consumed.
|
|
769
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#entproc
|
|
770
|
+
*/
|
|
507
771
|
L() {
|
|
772
|
+
let startIndex = this.c.d;
|
|
773
|
+
let ref = this.C();
|
|
774
|
+
return ref ? this.y(ref, startIndex) : false;
|
|
775
|
+
}
|
|
776
|
+
/**
|
|
777
|
+
* Consumes a doctype declaration if possible.
|
|
778
|
+
*
|
|
779
|
+
* This is a loose implementation since doctype declarations are currently
|
|
780
|
+
* discarded without further parsing.
|
|
781
|
+
*
|
|
782
|
+
* @returns Whether a doctype declaration was consumed.
|
|
783
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#dtd
|
|
784
|
+
*/
|
|
785
|
+
M() {
|
|
508
786
|
let { c: scanner } = this;
|
|
509
|
-
|
|
787
|
+
let startIndex = scanner.d;
|
|
788
|
+
if (!scanner.b("<!DOCTYPE")) {
|
|
510
789
|
return false;
|
|
511
790
|
}
|
|
512
|
-
|
|
513
|
-
if (
|
|
514
|
-
|
|
791
|
+
let name = this.e() && this.q();
|
|
792
|
+
if (!name) {
|
|
793
|
+
throw this.a("Expected a name");
|
|
794
|
+
}
|
|
795
|
+
let publicId;
|
|
796
|
+
let systemId;
|
|
797
|
+
if (this.e()) {
|
|
798
|
+
if (scanner.b("PUBLIC")) {
|
|
799
|
+
publicId = this.e() && this.N();
|
|
800
|
+
if (publicId === false) {
|
|
801
|
+
throw this.a("Expected a public identifier");
|
|
802
|
+
}
|
|
803
|
+
this.e();
|
|
804
|
+
}
|
|
805
|
+
if (publicId !== void 0 || scanner.b("SYSTEM")) {
|
|
806
|
+
this.e();
|
|
807
|
+
systemId = this.r();
|
|
808
|
+
if (systemId === false) {
|
|
809
|
+
throw this.a("Expected a system identifier");
|
|
810
|
+
}
|
|
811
|
+
this.e();
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
let internalSubset;
|
|
815
|
+
if (scanner.b("[")) {
|
|
816
|
+
internalSubset = scanner.x(/\][\x20\t\r\n]*>/);
|
|
817
|
+
if (!scanner.b("]")) {
|
|
818
|
+
throw this.a("Unclosed internal subset");
|
|
819
|
+
}
|
|
820
|
+
this.e();
|
|
515
821
|
}
|
|
516
822
|
if (!scanner.b(">")) {
|
|
517
823
|
throw this.a("Unclosed doctype declaration");
|
|
518
824
|
}
|
|
519
|
-
return true;
|
|
520
|
-
}
|
|
521
|
-
|
|
825
|
+
return this.g.preserveDocumentType ? this.i(new XmlDocumentType(name, publicId, systemId, internalSubset), startIndex) : true;
|
|
826
|
+
}
|
|
827
|
+
/**
|
|
828
|
+
* Consumes an element if possible.
|
|
829
|
+
*
|
|
830
|
+
* @returns Whether an element was consumed.
|
|
831
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-element
|
|
832
|
+
*/
|
|
833
|
+
E() {
|
|
522
834
|
let { c: scanner } = this;
|
|
523
|
-
let
|
|
835
|
+
let startIndex = scanner.d;
|
|
524
836
|
if (!scanner.b("<")) {
|
|
525
837
|
return false;
|
|
526
838
|
}
|
|
527
839
|
let name = this.q();
|
|
528
840
|
if (!name) {
|
|
529
|
-
scanner.
|
|
841
|
+
scanner.n(startIndex);
|
|
530
842
|
return false;
|
|
531
843
|
}
|
|
532
|
-
let attributes = this.
|
|
533
|
-
let isEmpty =
|
|
844
|
+
let attributes = this.H();
|
|
845
|
+
let isEmpty = !!scanner.b("/>");
|
|
534
846
|
let element = new XmlElement(name, attributes);
|
|
535
|
-
element.parent = this.
|
|
847
|
+
element.parent = this.j;
|
|
536
848
|
if (!isEmpty) {
|
|
537
849
|
if (!scanner.b(">")) {
|
|
538
850
|
throw this.a(`Unclosed start tag for element \`${name}\``);
|
|
539
851
|
}
|
|
540
|
-
this.
|
|
852
|
+
this.j = element;
|
|
541
853
|
do {
|
|
542
|
-
this.
|
|
543
|
-
} while (this.
|
|
854
|
+
this.K();
|
|
855
|
+
} while (this.E() || this.L() || this.J() || this.F() || this.D());
|
|
544
856
|
let endTagMark = scanner.d;
|
|
545
857
|
let endTagName;
|
|
546
858
|
if (!scanner.b("</") || !(endTagName = this.q()) || endTagName !== name) {
|
|
547
|
-
scanner.
|
|
859
|
+
scanner.n(endTagMark);
|
|
548
860
|
throw this.a(`Missing end tag for element ${name}`);
|
|
549
861
|
}
|
|
550
862
|
this.e();
|
|
551
863
|
if (!scanner.b(">")) {
|
|
552
864
|
throw this.a(`Unclosed end tag for element ${name}`);
|
|
553
865
|
}
|
|
554
|
-
this.
|
|
866
|
+
this.j = element.parent;
|
|
555
867
|
}
|
|
556
|
-
this.
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
868
|
+
return this.i(element, startIndex);
|
|
869
|
+
}
|
|
870
|
+
/**
|
|
871
|
+
* Consumes an `Eq` production if possible.
|
|
872
|
+
*
|
|
873
|
+
* @returns Whether an `Eq` production was consumed.
|
|
874
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Eq
|
|
875
|
+
*/
|
|
876
|
+
t() {
|
|
560
877
|
this.e();
|
|
561
878
|
if (this.c.b("=")) {
|
|
562
879
|
this.e();
|
|
@@ -564,22 +881,40 @@ var Parser = class {
|
|
|
564
881
|
}
|
|
565
882
|
return false;
|
|
566
883
|
}
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
884
|
+
/**
|
|
885
|
+
* Consumes `Misc` content if possible.
|
|
886
|
+
*
|
|
887
|
+
* @returns Whether anything was consumed.
|
|
888
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Misc
|
|
889
|
+
*/
|
|
890
|
+
z() {
|
|
891
|
+
return this.D() || this.F() || this.e();
|
|
892
|
+
}
|
|
893
|
+
/**
|
|
894
|
+
* Consumes one or more `Name` characters if possible.
|
|
895
|
+
*
|
|
896
|
+
* @returns `Name` characters, or an empty string if none were consumed.
|
|
897
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Name
|
|
898
|
+
*/
|
|
570
899
|
q() {
|
|
571
|
-
return isNameStartChar(this.c.
|
|
572
|
-
}
|
|
573
|
-
|
|
900
|
+
return isNameStartChar(this.c.m()) ? this.c.w(isNameChar) : emptyString2;
|
|
901
|
+
}
|
|
902
|
+
/**
|
|
903
|
+
* Consumes a processing instruction if possible.
|
|
904
|
+
*
|
|
905
|
+
* @returns Whether a processing instruction was consumed.
|
|
906
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-pi
|
|
907
|
+
*/
|
|
908
|
+
F() {
|
|
574
909
|
let { c: scanner } = this;
|
|
575
|
-
let
|
|
910
|
+
let startIndex = scanner.d;
|
|
576
911
|
if (!scanner.b("<?")) {
|
|
577
912
|
return false;
|
|
578
913
|
}
|
|
579
914
|
let name = this.q();
|
|
580
915
|
if (name) {
|
|
581
916
|
if (name.toLowerCase() === "xml") {
|
|
582
|
-
scanner.
|
|
917
|
+
scanner.n(startIndex);
|
|
583
918
|
throw this.a("XML declaration isn't allowed here");
|
|
584
919
|
}
|
|
585
920
|
} else {
|
|
@@ -587,38 +922,73 @@ var Parser = class {
|
|
|
587
922
|
}
|
|
588
923
|
if (!this.e()) {
|
|
589
924
|
if (scanner.b("?>")) {
|
|
590
|
-
this.
|
|
591
|
-
return true;
|
|
925
|
+
return this.i(new XmlProcessingInstruction(name), startIndex);
|
|
592
926
|
}
|
|
593
927
|
throw this.a("Whitespace is required after a processing instruction name");
|
|
594
928
|
}
|
|
595
|
-
let content = scanner.
|
|
596
|
-
this.
|
|
929
|
+
let content = scanner.s("?>");
|
|
930
|
+
this.o(content);
|
|
597
931
|
if (!scanner.b("?>")) {
|
|
598
932
|
throw this.a("Unterminated processing instruction");
|
|
599
933
|
}
|
|
600
|
-
this.
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
934
|
+
return this.i(new XmlProcessingInstruction(name, normalizeLineBreaks(content)), startIndex);
|
|
935
|
+
}
|
|
936
|
+
/**
|
|
937
|
+
* Consumes a prolog if possible.
|
|
938
|
+
*
|
|
939
|
+
* @returns Whether a prolog was consumed.
|
|
940
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-prolog-dtd
|
|
941
|
+
*/
|
|
942
|
+
O() {
|
|
604
943
|
let { c: scanner } = this;
|
|
605
|
-
let
|
|
606
|
-
this.
|
|
607
|
-
while (this.
|
|
944
|
+
let startIndex = scanner.d;
|
|
945
|
+
this.P();
|
|
946
|
+
while (this.z()) {
|
|
608
947
|
}
|
|
609
|
-
if (this.
|
|
610
|
-
while (this.
|
|
948
|
+
if (this.M()) {
|
|
949
|
+
while (this.z()) {
|
|
611
950
|
}
|
|
612
951
|
}
|
|
613
|
-
return
|
|
952
|
+
return startIndex < scanner.d;
|
|
953
|
+
}
|
|
954
|
+
/**
|
|
955
|
+
* Consumes a public identifier literal if possible.
|
|
956
|
+
*
|
|
957
|
+
* @returns
|
|
958
|
+
* Value of the public identifier literal minus quotes, or `false` if
|
|
959
|
+
* nothing was consumed. An empty string indicates that a public id literal
|
|
960
|
+
* was consumed but was empty.
|
|
961
|
+
*
|
|
962
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-PubidLiteral
|
|
963
|
+
*/
|
|
964
|
+
N() {
|
|
965
|
+
let startIndex = this.c.d;
|
|
966
|
+
let value = this.r();
|
|
967
|
+
if (value !== false && !/^[-\x20\r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*$/.test(value)) {
|
|
968
|
+
this.c.n(startIndex);
|
|
969
|
+
throw this.a("Invalid character in public identifier");
|
|
970
|
+
}
|
|
971
|
+
return value;
|
|
614
972
|
}
|
|
615
|
-
|
|
973
|
+
/**
|
|
974
|
+
* Consumes a reference if possible.
|
|
975
|
+
*
|
|
976
|
+
* This differs from `consumeContentReference()` in that a consumed reference
|
|
977
|
+
* will be returned rather than added to the document.
|
|
978
|
+
*
|
|
979
|
+
* @returns
|
|
980
|
+
* Parsed reference value, or `false` if nothing was consumed (to
|
|
981
|
+
* distinguish from a reference that resolves to an empty string).
|
|
982
|
+
*
|
|
983
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Reference
|
|
984
|
+
*/
|
|
985
|
+
C() {
|
|
616
986
|
let { c: scanner } = this;
|
|
617
987
|
if (!scanner.b("&")) {
|
|
618
988
|
return false;
|
|
619
989
|
}
|
|
620
|
-
let ref = scanner.
|
|
621
|
-
if (scanner.
|
|
990
|
+
let ref = scanner.w(isReferenceChar);
|
|
991
|
+
if (scanner.G() !== ";") {
|
|
622
992
|
throw this.a("Unterminated reference (a reference must end with `;`)");
|
|
623
993
|
}
|
|
624
994
|
let parsedValue;
|
|
@@ -637,7 +1007,7 @@ var Parser = class {
|
|
|
637
1007
|
let {
|
|
638
1008
|
ignoreUndefinedEntities,
|
|
639
1009
|
resolveUndefinedEntity
|
|
640
|
-
} = this.
|
|
1010
|
+
} = this.g;
|
|
641
1011
|
let wrappedRef = `&${ref};`;
|
|
642
1012
|
if (resolveUndefinedEntity) {
|
|
643
1013
|
let resolvedValue = resolveUndefinedEntity(wrappedRef);
|
|
@@ -652,48 +1022,79 @@ var Parser = class {
|
|
|
652
1022
|
if (ignoreUndefinedEntities) {
|
|
653
1023
|
return wrappedRef;
|
|
654
1024
|
}
|
|
655
|
-
scanner.
|
|
1025
|
+
scanner.n(-wrappedRef.length);
|
|
656
1026
|
throw this.a(`Named entity isn't defined: ${wrappedRef}`);
|
|
657
1027
|
}
|
|
658
1028
|
}
|
|
659
1029
|
return parsedValue;
|
|
660
1030
|
}
|
|
661
|
-
|
|
1031
|
+
/**
|
|
1032
|
+
* Consumes a `SystemLiteral` if possible.
|
|
1033
|
+
*
|
|
1034
|
+
* A `SystemLiteral` is similar to an attribute value, but allows the
|
|
1035
|
+
* characters `<` and `&` and doesn't replace references.
|
|
1036
|
+
*
|
|
1037
|
+
* @returns
|
|
1038
|
+
* Value of the `SystemLiteral` minus quotes, or `false` if nothing was
|
|
1039
|
+
* consumed. An empty string indicates that a `SystemLiteral` was consumed
|
|
1040
|
+
* but was empty.
|
|
1041
|
+
*
|
|
1042
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-SystemLiteral
|
|
1043
|
+
*/
|
|
1044
|
+
r() {
|
|
662
1045
|
let { c: scanner } = this;
|
|
663
1046
|
let quote = scanner.b('"') || scanner.b("'");
|
|
664
1047
|
if (!quote) {
|
|
665
1048
|
return false;
|
|
666
1049
|
}
|
|
667
|
-
let value = scanner.
|
|
668
|
-
this.
|
|
1050
|
+
let value = scanner.s(quote);
|
|
1051
|
+
this.o(value);
|
|
669
1052
|
if (!scanner.b(quote)) {
|
|
670
1053
|
throw this.a("Missing end quote");
|
|
671
1054
|
}
|
|
672
1055
|
return value;
|
|
673
1056
|
}
|
|
1057
|
+
/**
|
|
1058
|
+
* Consumes one or more whitespace characters if possible.
|
|
1059
|
+
*
|
|
1060
|
+
* @returns Whether any whitespace characters were consumed.
|
|
1061
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
1062
|
+
*/
|
|
674
1063
|
e() {
|
|
675
|
-
return
|
|
676
|
-
}
|
|
677
|
-
|
|
1064
|
+
return !!this.c.w(isWhitespace);
|
|
1065
|
+
}
|
|
1066
|
+
/**
|
|
1067
|
+
* Consumes an XML declaration if possible.
|
|
1068
|
+
*
|
|
1069
|
+
* @returns Whether an XML declaration was consumed.
|
|
1070
|
+
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-XMLDecl
|
|
1071
|
+
*/
|
|
1072
|
+
P() {
|
|
678
1073
|
let { c: scanner } = this;
|
|
1074
|
+
let startIndex = scanner.d;
|
|
679
1075
|
if (!scanner.b("<?xml")) {
|
|
680
1076
|
return false;
|
|
681
1077
|
}
|
|
682
1078
|
if (!this.e()) {
|
|
683
1079
|
throw this.a("Invalid XML declaration");
|
|
684
1080
|
}
|
|
685
|
-
let version =
|
|
1081
|
+
let version = !!scanner.b("version") && this.t() && this.r();
|
|
686
1082
|
if (version === false) {
|
|
687
1083
|
throw this.a("XML version is missing or invalid");
|
|
688
1084
|
} else if (!/^1\.[0-9]+$/.test(version)) {
|
|
689
1085
|
throw this.a("Invalid character in version number");
|
|
690
1086
|
}
|
|
1087
|
+
let encoding;
|
|
1088
|
+
let standalone;
|
|
691
1089
|
if (this.e()) {
|
|
692
|
-
|
|
1090
|
+
encoding = !!scanner.b("encoding") && this.t() && this.r();
|
|
693
1091
|
if (encoding) {
|
|
1092
|
+
if (!/^[A-Za-z][\w.-]*$/.test(encoding)) {
|
|
1093
|
+
throw this.a("Invalid character in encoding name");
|
|
1094
|
+
}
|
|
694
1095
|
this.e();
|
|
695
1096
|
}
|
|
696
|
-
|
|
1097
|
+
standalone = !!scanner.b("standalone") && this.t() && this.r();
|
|
697
1098
|
if (standalone) {
|
|
698
1099
|
if (standalone !== "yes" && standalone !== "no") {
|
|
699
1100
|
throw this.a('Only "yes" and "no" are permitted as values of `standalone`');
|
|
@@ -704,54 +1105,44 @@ var Parser = class {
|
|
|
704
1105
|
if (!scanner.b("?>")) {
|
|
705
1106
|
throw this.a("Invalid or unclosed XML declaration");
|
|
706
1107
|
}
|
|
707
|
-
return
|
|
708
|
-
|
|
1108
|
+
return this.g.preserveXmlDeclaration ? this.i(new XmlDeclaration(
|
|
1109
|
+
version,
|
|
1110
|
+
encoding || void 0,
|
|
1111
|
+
standalone || void 0
|
|
1112
|
+
), startIndex) : true;
|
|
1113
|
+
}
|
|
1114
|
+
/**
|
|
1115
|
+
* Returns an `XmlError` for the current scanner position.
|
|
1116
|
+
*/
|
|
709
1117
|
a(message) {
|
|
710
|
-
let {
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
column += 1;
|
|
722
|
-
excerpt += char;
|
|
723
|
-
}
|
|
1118
|
+
let { c: scanner } = this;
|
|
1119
|
+
return new XmlError(message, scanner.d, scanner.h);
|
|
1120
|
+
}
|
|
1121
|
+
/**
|
|
1122
|
+
* Parses the XML input.
|
|
1123
|
+
*/
|
|
1124
|
+
parse() {
|
|
1125
|
+
this.c.b("\uFEFF");
|
|
1126
|
+
this.O();
|
|
1127
|
+
if (!this.E()) {
|
|
1128
|
+
throw this.a("Root element is missing or invalid");
|
|
724
1129
|
}
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
if (column < 40) {
|
|
730
|
-
excerpt = excerpt.slice(0, 50);
|
|
731
|
-
} else {
|
|
732
|
-
excerptStart = column - 20;
|
|
733
|
-
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
734
|
-
}
|
|
1130
|
+
while (this.z()) {
|
|
1131
|
+
}
|
|
1132
|
+
if (!this.c.B) {
|
|
1133
|
+
throw this.a("Extra content at the end of the document");
|
|
735
1134
|
}
|
|
736
|
-
let err = new Error(
|
|
737
|
-
`${message} (line ${line}, column ${column})
|
|
738
|
-
${excerpt}
|
|
739
|
-
` + " ".repeat(column - excerptStart + 1) + "^\n"
|
|
740
|
-
);
|
|
741
|
-
Object.assign(err, {
|
|
742
|
-
column,
|
|
743
|
-
excerpt,
|
|
744
|
-
line,
|
|
745
|
-
pos: charIndex
|
|
746
|
-
});
|
|
747
|
-
return err;
|
|
748
1135
|
}
|
|
749
|
-
|
|
1136
|
+
/**
|
|
1137
|
+
* Throws an invalid character error if any character in the given _string_
|
|
1138
|
+
* isn't a valid XML character.
|
|
1139
|
+
*/
|
|
1140
|
+
o(string) {
|
|
750
1141
|
let { length } = string;
|
|
751
1142
|
for (let i = 0; i < length; ++i) {
|
|
752
1143
|
let cp = string.codePointAt(i);
|
|
753
1144
|
if (!isXmlCodePoint(cp)) {
|
|
754
|
-
this.c.
|
|
1145
|
+
this.c.n(-([...string].length - i));
|
|
755
1146
|
throw this.a("Invalid character");
|
|
756
1147
|
}
|
|
757
1148
|
if (cp > 65535) {
|
|
@@ -760,11 +1151,12 @@ var Parser = class {
|
|
|
760
1151
|
}
|
|
761
1152
|
}
|
|
762
1153
|
};
|
|
763
|
-
function
|
|
764
|
-
|
|
765
|
-
|
|
1154
|
+
function normalizeLineBreaks(text) {
|
|
1155
|
+
let i = 0;
|
|
1156
|
+
while ((i = text.indexOf("\r", i)) !== -1) {
|
|
1157
|
+
text = text[i + 1] === "\n" ? text.slice(0, i) + text.slice(i + 1) : text.slice(0, i) + "\n" + text.slice(i + 1);
|
|
766
1158
|
}
|
|
767
|
-
return
|
|
1159
|
+
return text;
|
|
768
1160
|
}
|
|
769
1161
|
|
|
770
1162
|
// src/index.ts
|