@rgrove/parse-xml 4.0.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -31
- package/dist/browser.js +692 -300
- package/dist/browser.js.map +4 -4
- package/dist/global.min.js +9 -8
- package/dist/global.min.js.map +4 -4
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -2
- package/dist/index.js.map +1 -1
- package/dist/lib/Parser.d.ts +53 -6
- package/dist/lib/Parser.d.ts.map +1 -1
- package/dist/lib/Parser.js +166 -126
- package/dist/lib/Parser.js.map +1 -1
- package/dist/lib/StringScanner.d.ts +15 -21
- package/dist/lib/StringScanner.d.ts.map +1 -1
- package/dist/lib/StringScanner.js +63 -86
- package/dist/lib/StringScanner.js.map +1 -1
- package/dist/lib/XmlDeclaration.d.ts +30 -0
- package/dist/lib/XmlDeclaration.d.ts.map +1 -0
- package/dist/lib/XmlDeclaration.js +36 -0
- package/dist/lib/XmlDeclaration.js.map +1 -0
- package/dist/lib/XmlDocument.d.ts +4 -2
- package/dist/lib/XmlDocument.d.ts.map +1 -1
- package/dist/lib/XmlDocument.js.map +1 -1
- package/dist/lib/XmlDocumentType.d.ts +37 -0
- package/dist/lib/XmlDocumentType.d.ts.map +1 -0
- package/dist/lib/XmlDocumentType.js +39 -0
- package/dist/lib/XmlDocumentType.js.map +1 -0
- package/dist/lib/XmlElement.js.map +1 -1
- package/dist/lib/XmlError.d.ts +24 -0
- package/dist/lib/XmlError.d.ts.map +1 -0
- package/dist/lib/XmlError.js +52 -0
- package/dist/lib/XmlError.js.map +1 -0
- package/dist/lib/XmlNode.d.ts +20 -1
- package/dist/lib/XmlNode.d.ts.map +1 -1
- package/dist/lib/XmlNode.js +28 -3
- package/dist/lib/XmlNode.js.map +1 -1
- package/dist/lib/syntax.d.ts.map +1 -1
- package/dist/lib/syntax.js +18 -23
- package/dist/lib/syntax.js.map +1 -1
- package/dist/lib/types.d.ts +2 -2
- package/dist/lib/types.d.ts.map +1 -1
- package/package.json +20 -23
- package/src/index.ts +3 -0
- package/src/lib/Parser.ts +228 -141
- package/src/lib/StringScanner.ts +66 -103
- package/src/lib/XmlDeclaration.ts +58 -0
- package/src/lib/XmlDocument.ts +4 -2
- package/src/lib/XmlDocumentType.ts +67 -0
- package/src/lib/XmlError.ts +80 -0
- package/src/lib/XmlNode.ts +33 -3
- package/src/lib/syntax.ts +12 -18
package/src/lib/StringScanner.ts
CHANGED
|
@@ -42,16 +42,6 @@ export class StringScanner {
|
|
|
42
42
|
|
|
43
43
|
// -- Protected Methods ------------------------------------------------------
|
|
44
44
|
|
|
45
|
-
/**
|
|
46
|
-
* Returns the byte index of the given character index in the string. The two
|
|
47
|
-
* may differ in strings that contain multibyte characters.
|
|
48
|
-
*/
|
|
49
|
-
protected charIndexToByteIndex(charIndex: number = this.charIndex): number {
|
|
50
|
-
return this.multiByteMode
|
|
51
|
-
? (this.charsToBytes as number[])[charIndex] ?? Infinity
|
|
52
|
-
: charIndex;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
45
|
/**
|
|
56
46
|
* Returns the number of characters in the given string, which may differ from
|
|
57
47
|
* the byte length if the string contains multibyte characters.
|
|
@@ -75,60 +65,76 @@ export class StringScanner {
|
|
|
75
65
|
this.charIndex = Math.min(this.charCount, this.charIndex + count);
|
|
76
66
|
}
|
|
77
67
|
|
|
68
|
+
/**
|
|
69
|
+
* Returns the byte index of the given character index in the string. The two
|
|
70
|
+
* may differ in strings that contain multibyte characters.
|
|
71
|
+
*/
|
|
72
|
+
charIndexToByteIndex(charIndex: number = this.charIndex): number {
|
|
73
|
+
return this.multiByteMode
|
|
74
|
+
? (this.charsToBytes as number[])[charIndex] ?? Infinity
|
|
75
|
+
: charIndex;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
78
|
/**
|
|
79
79
|
* Consumes and returns the given number of characters if possible, advancing
|
|
80
80
|
* the scanner and stopping if the end of the string is reached.
|
|
81
81
|
*
|
|
82
82
|
* If no characters could be consumed, an empty string will be returned.
|
|
83
83
|
*/
|
|
84
|
-
consume(
|
|
85
|
-
let chars = this.peek(
|
|
86
|
-
this.advance(
|
|
84
|
+
consume(charCount = 1): string {
|
|
85
|
+
let chars = this.peek(charCount);
|
|
86
|
+
this.advance(charCount);
|
|
87
87
|
return chars;
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
/**
|
|
91
|
-
* Consumes
|
|
92
|
-
*
|
|
91
|
+
* Consumes and returns the given number of bytes if possible, advancing the
|
|
92
|
+
* scanner and stopping if the end of the string is reached.
|
|
93
93
|
*
|
|
94
|
-
*
|
|
95
|
-
*
|
|
94
|
+
* It's up to the caller to ensure that the given byte count doesn't split a
|
|
95
|
+
* multibyte character.
|
|
96
96
|
*
|
|
97
|
-
*
|
|
97
|
+
* If no bytes could be consumed, an empty string will be returned.
|
|
98
98
|
*/
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
regex.lastIndex = this.charIndexToByteIndex();
|
|
105
|
-
|
|
106
|
-
let result = regex.exec(this.string);
|
|
107
|
-
|
|
108
|
-
if (result === null || result.length === 0) {
|
|
109
|
-
return emptyString;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
let match = result[0] as string;
|
|
113
|
-
this.advance(this.charLength(match));
|
|
114
|
-
return match;
|
|
99
|
+
consumeBytes(byteCount: number): string {
|
|
100
|
+
let byteIndex = this.charIndexToByteIndex();
|
|
101
|
+
let result = this.string.slice(byteIndex, byteIndex + byteCount);
|
|
102
|
+
this.advance(this.charLength(result));
|
|
103
|
+
return result;
|
|
115
104
|
}
|
|
116
105
|
|
|
117
106
|
/**
|
|
118
|
-
* Consumes and returns all characters for which the given function returns
|
|
119
|
-
*
|
|
120
|
-
*
|
|
107
|
+
* Consumes and returns all characters for which the given function returns
|
|
108
|
+
* `true`, stopping when `false` is returned or the end of the input is
|
|
109
|
+
* reached.
|
|
121
110
|
*/
|
|
122
111
|
consumeMatchFn(fn: (char: string) => boolean): string {
|
|
123
|
-
let
|
|
124
|
-
let
|
|
112
|
+
let { length, multiByteMode, string } = this;
|
|
113
|
+
let startByteIndex = this.charIndexToByteIndex();
|
|
114
|
+
let endByteIndex = startByteIndex;
|
|
115
|
+
|
|
116
|
+
if (multiByteMode) {
|
|
117
|
+
while (endByteIndex < length) {
|
|
118
|
+
let char = string[endByteIndex] as string;
|
|
119
|
+
let isSurrogatePair = char >= '\uD800' && char <= '\uDBFF';
|
|
120
|
+
|
|
121
|
+
if (isSurrogatePair) {
|
|
122
|
+
char += string[endByteIndex + 1];
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (!fn(char)) {
|
|
126
|
+
break;
|
|
127
|
+
}
|
|
125
128
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
+
endByteIndex += isSurrogatePair ? 2 : 1;
|
|
130
|
+
}
|
|
131
|
+
} else {
|
|
132
|
+
while (endByteIndex < length && fn(string[endByteIndex] as string)) {
|
|
133
|
+
++endByteIndex;
|
|
134
|
+
}
|
|
129
135
|
}
|
|
130
136
|
|
|
131
|
-
return
|
|
137
|
+
return this.consumeBytes(endByteIndex - startByteIndex);
|
|
132
138
|
}
|
|
133
139
|
|
|
134
140
|
/**
|
|
@@ -139,35 +145,11 @@ export class StringScanner {
|
|
|
139
145
|
* string will be returned and the scanner will not be advanced.
|
|
140
146
|
*/
|
|
141
147
|
consumeString(stringToConsume: string): string {
|
|
142
|
-
if (this.consumeStringFast(stringToConsume)) {
|
|
143
|
-
return stringToConsume;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
if (this.multiByteMode) {
|
|
147
|
-
let { length } = stringToConsume;
|
|
148
|
-
let charLengthToMatch = this.charLength(stringToConsume);
|
|
149
|
-
|
|
150
|
-
if (charLengthToMatch !== length
|
|
151
|
-
&& stringToConsume === this.peek(charLengthToMatch)) {
|
|
152
|
-
|
|
153
|
-
this.advance(charLengthToMatch);
|
|
154
|
-
return stringToConsume;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
return emptyString;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
/**
|
|
162
|
-
* Does the same thing as `consumeString()`, but doesn't support consuming
|
|
163
|
-
* multibyte characters. This can be faster if you only need to match single
|
|
164
|
-
* byte characters.
|
|
165
|
-
*/
|
|
166
|
-
consumeStringFast(stringToConsume: string): string {
|
|
167
148
|
let { length } = stringToConsume;
|
|
149
|
+
let byteIndex = this.charIndexToByteIndex();
|
|
168
150
|
|
|
169
|
-
if (this.
|
|
170
|
-
this.advance(length);
|
|
151
|
+
if (stringToConsume === this.string.slice(byteIndex, byteIndex + length)) {
|
|
152
|
+
this.advance(length === 1 ? 1 : this.charLength(stringToConsume));
|
|
171
153
|
return stringToConsume;
|
|
172
154
|
}
|
|
173
155
|
|
|
@@ -182,16 +164,13 @@ export class StringScanner {
|
|
|
182
164
|
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
183
165
|
*/
|
|
184
166
|
consumeUntilMatch(regex: RegExp): string {
|
|
185
|
-
let
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
if (matchByteIndex <= 0) {
|
|
189
|
-
return emptyString;
|
|
190
|
-
}
|
|
167
|
+
let matchByteIndex = this.string
|
|
168
|
+
.slice(this.charIndexToByteIndex())
|
|
169
|
+
.search(regex);
|
|
191
170
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
171
|
+
return matchByteIndex > 0
|
|
172
|
+
? this.consumeBytes(matchByteIndex)
|
|
173
|
+
: emptyString;
|
|
195
174
|
}
|
|
196
175
|
|
|
197
176
|
/**
|
|
@@ -202,17 +181,12 @@ export class StringScanner {
|
|
|
202
181
|
* Returns the consumed string, or an empty string if nothing was consumed.
|
|
203
182
|
*/
|
|
204
183
|
consumeUntilString(searchString: string): string {
|
|
205
|
-
let { string } = this;
|
|
206
184
|
let byteIndex = this.charIndexToByteIndex();
|
|
207
|
-
let matchByteIndex = string.indexOf(searchString, byteIndex);
|
|
208
|
-
|
|
209
|
-
if (matchByteIndex <= 0) {
|
|
210
|
-
return emptyString;
|
|
211
|
-
}
|
|
185
|
+
let matchByteIndex = this.string.indexOf(searchString, byteIndex);
|
|
212
186
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
187
|
+
return matchByteIndex > 0
|
|
188
|
+
? this.consumeBytes(matchByteIndex - byteIndex)
|
|
189
|
+
: emptyString;
|
|
216
190
|
}
|
|
217
191
|
|
|
218
192
|
/**
|
|
@@ -221,22 +195,11 @@ export class StringScanner {
|
|
|
221
195
|
* input string.
|
|
222
196
|
*/
|
|
223
197
|
peek(count = 1): string {
|
|
224
|
-
let { charIndex,
|
|
225
|
-
|
|
226
|
-
if (multiByteMode) {
|
|
227
|
-
// Inlining this comparison instead of checking `this.isEnd` improves perf
|
|
228
|
-
// slightly since `peek()` is called so frequently.
|
|
229
|
-
if (charIndex >= this.charCount) {
|
|
230
|
-
return emptyString;
|
|
231
|
-
}
|
|
198
|
+
let { charIndex, string } = this;
|
|
232
199
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
);
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
return string.slice(charIndex, charIndex + count);
|
|
200
|
+
return this.multiByteMode
|
|
201
|
+
? string.slice(this.charIndexToByteIndex(charIndex), this.charIndexToByteIndex(charIndex + count))
|
|
202
|
+
: string.slice(charIndex, charIndex + count);
|
|
240
203
|
}
|
|
241
204
|
|
|
242
205
|
/**
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { XmlNode } from './XmlNode.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* An XML declaration within an XML document.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
*
|
|
8
|
+
* ```xml
|
|
9
|
+
* <?xml version="1.0" encoding="UTF-8"?>
|
|
10
|
+
* ```
|
|
11
|
+
*/
|
|
12
|
+
export class XmlDeclaration extends XmlNode {
|
|
13
|
+
/**
|
|
14
|
+
* Value of the encoding declaration in this XML declaration, or `null` if no
|
|
15
|
+
* encoding declaration was present.
|
|
16
|
+
*/
|
|
17
|
+
encoding: string | null;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Value of the standalone declaration in this XML declaration, or `null` if
|
|
21
|
+
* no standalone declaration was present.
|
|
22
|
+
*/
|
|
23
|
+
standalone: 'yes' | 'no' | null;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Value of the version declaration in this XML declaration.
|
|
27
|
+
*/
|
|
28
|
+
version: string;
|
|
29
|
+
|
|
30
|
+
constructor(
|
|
31
|
+
version: string,
|
|
32
|
+
encoding?: string,
|
|
33
|
+
standalone?: typeof XmlDeclaration.prototype.standalone,
|
|
34
|
+
) {
|
|
35
|
+
super();
|
|
36
|
+
|
|
37
|
+
this.version = version;
|
|
38
|
+
this.encoding = encoding ?? null;
|
|
39
|
+
this.standalone = standalone ?? null;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
override get type() {
|
|
43
|
+
return XmlNode.TYPE_XML_DECLARATION;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
override toJSON() {
|
|
47
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
48
|
+
json.version = this.version;
|
|
49
|
+
|
|
50
|
+
for (let key of ['encoding', 'standalone'] as const) {
|
|
51
|
+
if (this[key] !== null) {
|
|
52
|
+
json[key] = this[key];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return json;
|
|
57
|
+
}
|
|
58
|
+
}
|
package/src/lib/XmlDocument.ts
CHANGED
|
@@ -2,6 +2,8 @@ import { XmlElement } from './XmlElement.js';
|
|
|
2
2
|
import { XmlNode } from './XmlNode.js';
|
|
3
3
|
|
|
4
4
|
import type { XmlComment } from './XmlComment.js';
|
|
5
|
+
import type { XmlDeclaration } from './XmlDeclaration.js';
|
|
6
|
+
import type { XmlDocumentType } from './XmlDocumentType.js';
|
|
5
7
|
import type { XmlProcessingInstruction } from './XmlProcessingInstruction.js';
|
|
6
8
|
|
|
7
9
|
/**
|
|
@@ -12,9 +14,9 @@ export class XmlDocument extends XmlNode {
|
|
|
12
14
|
/**
|
|
13
15
|
* Child nodes of this document.
|
|
14
16
|
*/
|
|
15
|
-
readonly children: Array<XmlComment | XmlProcessingInstruction | XmlElement>;
|
|
17
|
+
readonly children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlProcessingInstruction | XmlElement>;
|
|
16
18
|
|
|
17
|
-
constructor(children: Array<XmlComment | XmlElement | XmlProcessingInstruction> = []) {
|
|
19
|
+
constructor(children: Array<XmlComment | XmlDeclaration | XmlDocumentType | XmlElement | XmlProcessingInstruction> = []) {
|
|
18
20
|
super();
|
|
19
21
|
this.children = children;
|
|
20
22
|
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import { XmlNode } from './XmlNode.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* A document type declaration within an XML document.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
*
|
|
8
|
+
* ```xml
|
|
9
|
+
* <!DOCTYPE kittens [
|
|
10
|
+
* <!ELEMENT kittens (#PCDATA)>
|
|
11
|
+
* ]>
|
|
12
|
+
* ```
|
|
13
|
+
*/
|
|
14
|
+
export class XmlDocumentType extends XmlNode {
|
|
15
|
+
/**
|
|
16
|
+
* Name of the root element described by this document type declaration.
|
|
17
|
+
*/
|
|
18
|
+
name: string;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Public identifier of the external subset of this document type declaration,
|
|
22
|
+
* or `null` if no public identifier was present.
|
|
23
|
+
*/
|
|
24
|
+
publicId: string | null;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* System identifier of the external subset of this document type declaration,
|
|
28
|
+
* or `null` if no system identifier was present.
|
|
29
|
+
*/
|
|
30
|
+
systemId: string | null;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Internal subset of this document type declaration, or `null` if no internal
|
|
34
|
+
* subset was present.
|
|
35
|
+
*/
|
|
36
|
+
internalSubset: string | null;
|
|
37
|
+
|
|
38
|
+
constructor(
|
|
39
|
+
name: string,
|
|
40
|
+
publicId?: string,
|
|
41
|
+
systemId?: string,
|
|
42
|
+
internalSubset?: string,
|
|
43
|
+
) {
|
|
44
|
+
super();
|
|
45
|
+
this.name = name;
|
|
46
|
+
this.publicId = publicId ?? null;
|
|
47
|
+
this.systemId = systemId ?? null;
|
|
48
|
+
this.internalSubset = internalSubset ?? null;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
override get type() {
|
|
52
|
+
return XmlNode.TYPE_DOCUMENT_TYPE;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
override toJSON() {
|
|
56
|
+
let json = XmlNode.prototype.toJSON.call(this);
|
|
57
|
+
json.name = this.name;
|
|
58
|
+
|
|
59
|
+
for (let key of ['publicId', 'systemId', 'internalSubset'] as const) {
|
|
60
|
+
if (this[key] !== null) {
|
|
61
|
+
json[key] = this[key];
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
return json;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* An error that occurred while parsing XML.
|
|
3
|
+
*/
|
|
4
|
+
export class XmlError extends Error {
|
|
5
|
+
/**
|
|
6
|
+
* Character column at which this error occurred (1-based).
|
|
7
|
+
*/
|
|
8
|
+
readonly column: number;
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Short excerpt from the input string that contains the problem.
|
|
12
|
+
*/
|
|
13
|
+
readonly excerpt: string;
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Line number at which this error occurred (1-based).
|
|
17
|
+
*/
|
|
18
|
+
readonly line: number;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Character position at which this error occurred relative to the beginning
|
|
22
|
+
* of the input (0-based).
|
|
23
|
+
*/
|
|
24
|
+
readonly pos: number;
|
|
25
|
+
|
|
26
|
+
constructor(
|
|
27
|
+
message: string,
|
|
28
|
+
charIndex: number,
|
|
29
|
+
xml: string,
|
|
30
|
+
) {
|
|
31
|
+
let column = 1;
|
|
32
|
+
let excerpt = '';
|
|
33
|
+
let line = 1;
|
|
34
|
+
|
|
35
|
+
// Find the line and column where the error occurred.
|
|
36
|
+
for (let i = 0; i < charIndex; ++i) {
|
|
37
|
+
let char = xml[i];
|
|
38
|
+
|
|
39
|
+
if (char === '\n') {
|
|
40
|
+
column = 1;
|
|
41
|
+
excerpt = '';
|
|
42
|
+
line += 1;
|
|
43
|
+
} else {
|
|
44
|
+
column += 1;
|
|
45
|
+
excerpt += char;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
let eol = xml.indexOf('\n', charIndex);
|
|
50
|
+
|
|
51
|
+
excerpt += eol === -1
|
|
52
|
+
? xml.slice(charIndex)
|
|
53
|
+
: xml.slice(charIndex, eol);
|
|
54
|
+
|
|
55
|
+
let excerptStart = 0;
|
|
56
|
+
|
|
57
|
+
// Keep the excerpt below 50 chars, but always keep the error position in
|
|
58
|
+
// view.
|
|
59
|
+
if (excerpt.length > 50) {
|
|
60
|
+
if (column < 40) {
|
|
61
|
+
excerpt = excerpt.slice(0, 50);
|
|
62
|
+
} else {
|
|
63
|
+
excerptStart = column - 20;
|
|
64
|
+
excerpt = excerpt.slice(excerptStart, column + 30);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
super(
|
|
69
|
+
`${message} (line ${line}, column ${column})\n`
|
|
70
|
+
+ ` ${excerpt}\n`
|
|
71
|
+
+ ' '.repeat(column - excerptStart + 1) + '^\n',
|
|
72
|
+
);
|
|
73
|
+
|
|
74
|
+
this.column = column;
|
|
75
|
+
this.excerpt = excerpt;
|
|
76
|
+
this.line = line;
|
|
77
|
+
this.name = 'XmlError';
|
|
78
|
+
this.pos = charIndex;
|
|
79
|
+
}
|
|
80
|
+
}
|
package/src/lib/XmlNode.ts
CHANGED
|
@@ -21,6 +21,11 @@ export class XmlNode {
|
|
|
21
21
|
*/
|
|
22
22
|
static readonly TYPE_DOCUMENT = 'document';
|
|
23
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Type value for an `XmlDocumentType` node.
|
|
26
|
+
*/
|
|
27
|
+
static readonly TYPE_DOCUMENT_TYPE = 'doctype';
|
|
28
|
+
|
|
24
29
|
/**
|
|
25
30
|
* Type value for an `XmlElement` node.
|
|
26
31
|
*/
|
|
@@ -36,11 +41,28 @@ export class XmlNode {
|
|
|
36
41
|
*/
|
|
37
42
|
static readonly TYPE_TEXT = 'text';
|
|
38
43
|
|
|
44
|
+
/**
|
|
45
|
+
* Type value for an `XmlDeclaration` node.
|
|
46
|
+
*/
|
|
47
|
+
static readonly TYPE_XML_DECLARATION = 'xmldecl';
|
|
48
|
+
|
|
39
49
|
/**
|
|
40
50
|
* Parent node of this node, or `null` if this node has no parent.
|
|
41
51
|
*/
|
|
42
52
|
parent: XmlDocument | XmlElement | null = null;
|
|
43
53
|
|
|
54
|
+
/**
|
|
55
|
+
* Starting byte offset of this node in the original XML string, or `-1` if
|
|
56
|
+
* the offset is unknown.
|
|
57
|
+
*/
|
|
58
|
+
start = -1;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Ending byte offset of this node in the original XML string, or `-1` if the
|
|
62
|
+
* offset is unknown.
|
|
63
|
+
*/
|
|
64
|
+
end = -1;
|
|
65
|
+
|
|
44
66
|
/**
|
|
45
67
|
* Document that contains this node, or `null` if this node is not associated
|
|
46
68
|
* with a document.
|
|
@@ -50,10 +72,13 @@ export class XmlNode {
|
|
|
50
72
|
}
|
|
51
73
|
|
|
52
74
|
/**
|
|
53
|
-
* Whether this node is the root node of the document
|
|
75
|
+
* Whether this node is the root node of the document (also known as the
|
|
76
|
+
* document element).
|
|
54
77
|
*/
|
|
55
78
|
get isRootNode(): boolean {
|
|
56
|
-
return this.parent !== null
|
|
79
|
+
return this.parent !== null
|
|
80
|
+
&& this.parent === this.document
|
|
81
|
+
&& this.type === XmlNode.TYPE_ELEMENT;
|
|
57
82
|
}
|
|
58
83
|
|
|
59
84
|
/**
|
|
@@ -68,7 +93,7 @@ export class XmlNode {
|
|
|
68
93
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#sec-white-space
|
|
69
94
|
*/
|
|
70
95
|
get preserveWhitespace(): boolean {
|
|
71
|
-
return
|
|
96
|
+
return !!this.parent?.preserveWhitespace;
|
|
72
97
|
}
|
|
73
98
|
|
|
74
99
|
/**
|
|
@@ -102,6 +127,11 @@ export class XmlNode {
|
|
|
102
127
|
json.preserveWhitespace = true;
|
|
103
128
|
}
|
|
104
129
|
|
|
130
|
+
if (this.start !== -1) {
|
|
131
|
+
json.start = this.start;
|
|
132
|
+
json.end = this.end;
|
|
133
|
+
}
|
|
134
|
+
|
|
105
135
|
return json;
|
|
106
136
|
}
|
|
107
137
|
}
|
package/src/lib/syntax.ts
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*
|
|
5
5
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
|
6
6
|
*/
|
|
7
|
-
export const attValueCharDoubleQuote = /[
|
|
7
|
+
export const attValueCharDoubleQuote = /["&<]/;
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* Regular expression that matches one or more `AttValue` characters in a
|
|
@@ -12,7 +12,7 @@ export const attValueCharDoubleQuote = /[^"&<]+/y;
|
|
|
12
12
|
*
|
|
13
13
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-AttValue
|
|
14
14
|
*/
|
|
15
|
-
export const attValueCharSingleQuote = /[
|
|
15
|
+
export const attValueCharSingleQuote = /['&<]/;
|
|
16
16
|
|
|
17
17
|
/**
|
|
18
18
|
* Regular expression that matches a whitespace character that should be
|
|
@@ -20,7 +20,7 @@ export const attValueCharSingleQuote = /[^'&<]+/y;
|
|
|
20
20
|
*
|
|
21
21
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#AVNormalize
|
|
22
22
|
*/
|
|
23
|
-
export const attValueNormalizedWhitespace =
|
|
23
|
+
export const attValueNormalizedWhitespace = /\r\n|[\n\r\t]/g;
|
|
24
24
|
|
|
25
25
|
/**
|
|
26
26
|
* Regular expression that matches one or more characters that signal the end of
|
|
@@ -49,7 +49,7 @@ export const predefinedEntities: Readonly<{[name: string]: string;}> = Object.fr
|
|
|
49
49
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameChar
|
|
50
50
|
*/
|
|
51
51
|
export function isNameChar(char: string): boolean {
|
|
52
|
-
let cp =
|
|
52
|
+
let cp = char.codePointAt(0) as number;
|
|
53
53
|
|
|
54
54
|
// Including the most common NameStartChars here improves performance
|
|
55
55
|
// slightly.
|
|
@@ -60,7 +60,8 @@ export function isNameChar(char: string): boolean {
|
|
|
60
60
|
|| cp === 0x2E // .
|
|
61
61
|
|| cp === 0xB7
|
|
62
62
|
|| (cp >= 0x300 && cp <= 0x36F)
|
|
63
|
-
||
|
|
63
|
+
|| cp === 0x203F
|
|
64
|
+
|| cp === 0x2040
|
|
64
65
|
|| isNameStartChar(char, cp);
|
|
65
66
|
}
|
|
66
67
|
|
|
@@ -69,7 +70,7 @@ export function isNameChar(char: string): boolean {
|
|
|
69
70
|
*
|
|
70
71
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-NameStartChar
|
|
71
72
|
*/
|
|
72
|
-
export function isNameStartChar(char: string, cp =
|
|
73
|
+
export function isNameStartChar(char: string, cp = char.codePointAt(0) as number): boolean {
|
|
73
74
|
return (cp >= 0x61 && cp <= 0x7A) // a-z
|
|
74
75
|
|| (cp >= 0x41 && cp <= 0x5A) // A-Z
|
|
75
76
|
|| cp === 0x3A // :
|
|
@@ -79,7 +80,8 @@ export function isNameStartChar(char: string, cp = getCodePoint(char)): boolean
|
|
|
79
80
|
|| (cp >= 0xF8 && cp <= 0x2FF)
|
|
80
81
|
|| (cp >= 0x370 && cp <= 0x37D)
|
|
81
82
|
|| (cp >= 0x37F && cp <= 0x1FFF)
|
|
82
|
-
||
|
|
83
|
+
|| cp === 0x200C
|
|
84
|
+
|| cp === 0x200D
|
|
83
85
|
|| (cp >= 0x2070 && cp <= 0x218F)
|
|
84
86
|
|| (cp >= 0x2C00 && cp <= 0x2FEF)
|
|
85
87
|
|| (cp >= 0x3001 && cp <= 0xD7FF)
|
|
@@ -104,7 +106,7 @@ export function isReferenceChar(char: string): boolean {
|
|
|
104
106
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#white
|
|
105
107
|
*/
|
|
106
108
|
export function isWhitespace(char: string): boolean {
|
|
107
|
-
let cp =
|
|
109
|
+
let cp = char.codePointAt(0);
|
|
108
110
|
|
|
109
111
|
return cp === 0x20
|
|
110
112
|
|| cp === 0x9
|
|
@@ -119,18 +121,10 @@ export function isWhitespace(char: string): boolean {
|
|
|
119
121
|
* @see https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char
|
|
120
122
|
*/
|
|
121
123
|
export function isXmlCodePoint(cp: number): boolean {
|
|
122
|
-
return cp
|
|
124
|
+
return (cp >= 0x20 && cp <= 0xD7FF)
|
|
123
125
|
|| cp === 0xA
|
|
126
|
+
|| cp === 0x9
|
|
124
127
|
|| cp === 0xD
|
|
125
|
-
|| (cp >= 0x20 && cp <= 0xD7FF)
|
|
126
128
|
|| (cp >= 0xE000 && cp <= 0xFFFD)
|
|
127
129
|
|| (cp >= 0x10000 && cp <= 0x10FFFF);
|
|
128
130
|
}
|
|
129
|
-
|
|
130
|
-
/**
|
|
131
|
-
* Returns the Unicode code point value of the given character, or `-1` if
|
|
132
|
-
* _char_ is empty.
|
|
133
|
-
*/
|
|
134
|
-
function getCodePoint(char: string): number {
|
|
135
|
-
return char.codePointAt(0) || -1;
|
|
136
|
-
}
|