@xmldom/xmldom 0.9.0-beta.8 → 0.9.0-beta.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -1
- package/index.d.ts +361 -21
- package/lib/conventions.js +66 -65
- package/lib/dom-parser.js +140 -81
- package/lib/dom.js +32 -29
- package/lib/grammar.js +510 -0
- package/lib/index.js +15 -1
- package/lib/sax.js +286 -106
- package/package.json +3 -4
- package/readme.md +31 -42
package/lib/grammar.js
ADDED
|
@@ -0,0 +1,510 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Detects relevant unicode support for regular expressions in the runtime.
|
|
5
|
+
* Should the runtime not accepts the flag `u` or unicode ranges,
|
|
6
|
+
* character classes without unicode handling will be used.
|
|
7
|
+
*
|
|
8
|
+
* @param {typeof RegExp} [RegExpImpl=RegExp] for testing: the RegExp class
|
|
9
|
+
* @returns {boolean}
|
|
10
|
+
* @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
|
|
11
|
+
*/
|
|
12
|
+
function detectUnicodeSupport(RegExpImpl) {
|
|
13
|
+
try {
|
|
14
|
+
if (typeof RegExpImpl !== 'function') {
|
|
15
|
+
RegExpImpl = RegExp;
|
|
16
|
+
}
|
|
17
|
+
// eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
|
|
18
|
+
var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
|
|
19
|
+
return !!match && match[0].length === 2;
|
|
20
|
+
} catch (error) {}
|
|
21
|
+
return false;
|
|
22
|
+
}
|
|
23
|
+
var UNICODE_SUPPORT = detectUnicodeSupport();
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Removes `[`, `]` and any trailing quantifiers from the source of a RegExp
|
|
27
|
+
* @param {RegExp} regexp
|
|
28
|
+
*/
|
|
29
|
+
function chars(regexp) {
|
|
30
|
+
if (regexp.source[0] !== '[') {
|
|
31
|
+
throw new Error(regexp + ' can not be used with chars');
|
|
32
|
+
}
|
|
33
|
+
return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Creates a new character list regular expression,
|
|
38
|
+
* by removing `search` from the source of `regexp`.
|
|
39
|
+
* @param {RegExp} regexp
|
|
40
|
+
* @param {string} search the character(s) to remove
|
|
41
|
+
* @returns {RegExp}
|
|
42
|
+
*/
|
|
43
|
+
function chars_without(regexp, search) {
|
|
44
|
+
if (regexp.source[0] !== '[') {
|
|
45
|
+
throw new Error('/' + regexp.source + '/ can not be used with chars_without');
|
|
46
|
+
}
|
|
47
|
+
if (!search || typeof search !== 'string') {
|
|
48
|
+
throw new Error(JSON.stringify(search) + ' is not a valid search');
|
|
49
|
+
}
|
|
50
|
+
if (regexp.source.indexOf(search) === -1) {
|
|
51
|
+
throw new Error('"' + search + '" is not is /' + regexp.source + '/');
|
|
52
|
+
}
|
|
53
|
+
if (search === '-' && regexp.source.indexOf(search) !== 1) {
|
|
54
|
+
throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
|
|
55
|
+
}
|
|
56
|
+
return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Combines and Regular expressions correctly by using `RegExp.source`.
|
|
61
|
+
* @param {...(RegExp | string)[]} args
|
|
62
|
+
* @returns {RegExp}
|
|
63
|
+
*/
|
|
64
|
+
function reg(args) {
|
|
65
|
+
var self = this;
|
|
66
|
+
return new RegExp(
|
|
67
|
+
Array.prototype.slice
|
|
68
|
+
.call(arguments)
|
|
69
|
+
.map(function (part) {
|
|
70
|
+
var isStr = typeof part === 'string';
|
|
71
|
+
if (isStr && self === undefined && part === '|') {
|
|
72
|
+
throw new Error('use regg instead of reg to wrap expressions with `|`!');
|
|
73
|
+
}
|
|
74
|
+
return isStr ? part : part.source;
|
|
75
|
+
})
|
|
76
|
+
.join(''),
|
|
77
|
+
UNICODE_SUPPORT ? 'mu' : 'm'
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
|
|
83
|
+
* @param {...(RegExp | string)[]} args
|
|
84
|
+
* @returns {RegExp}
|
|
85
|
+
*/
|
|
86
|
+
function regg(args) {
|
|
87
|
+
if (arguments.length === 0) {
|
|
88
|
+
throw new Error('no parameters provided');
|
|
89
|
+
}
|
|
90
|
+
return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// /**
|
|
94
|
+
// * Append ^ to the beginning of the expression.
|
|
95
|
+
// * @param {...(RegExp | string)[]} args
|
|
96
|
+
// * @returns {RegExp}
|
|
97
|
+
// */
|
|
98
|
+
// function reg_start(args) {
|
|
99
|
+
// if (arguments.length === 0) {
|
|
100
|
+
// throw new Error('no parameters provided');
|
|
101
|
+
// }
|
|
102
|
+
// return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
|
|
103
|
+
// }
|
|
104
|
+
|
|
105
|
+
// https://www.w3.org/TR/xml/#document
|
|
106
|
+
// `[1] document ::= prolog element Misc*`
|
|
107
|
+
// https://www.w3.org/TR/xml11/#NT-document
|
|
108
|
+
// `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
|
|
109
|
+
|
|
110
|
+
// https://www.w3.org/TR/xml/#NT-Char
|
|
111
|
+
// any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
|
|
112
|
+
// `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
|
|
113
|
+
// https://www.w3.org/TR/xml11/#NT-Char
|
|
114
|
+
// `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
|
|
115
|
+
// https://www.w3.org/TR/xml11/#NT-RestrictedChar
|
|
116
|
+
// `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
|
|
117
|
+
// https://www.w3.org/TR/xml11/#charsets
|
|
118
|
+
var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
|
|
119
|
+
if (UNICODE_SUPPORT) {
|
|
120
|
+
// eslint-disable-next-line es5/no-unicode-code-point-escape
|
|
121
|
+
Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
var _SChar = /[\x20\x09\x0D\x0A]/;
|
|
125
|
+
var SChar_s = chars(_SChar);
|
|
126
|
+
// https://www.w3.org/TR/xml11/#NT-S
|
|
127
|
+
// `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
|
|
128
|
+
var S = reg(_SChar, '+');
|
|
129
|
+
// optional whitespace described as `S?` in the grammar,
|
|
130
|
+
// simplified to 0-n occurrences of the character class
|
|
131
|
+
// instead of 0-1 occurrences of a non-capturing group around S
|
|
132
|
+
var S_OPT = reg(_SChar, '*');
|
|
133
|
+
|
|
134
|
+
// https://www.w3.org/TR/xml11/#NT-NameStartChar
|
|
135
|
+
// `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
|
|
136
|
+
var NameStartChar =
|
|
137
|
+
/[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
|
|
138
|
+
if (UNICODE_SUPPORT) {
|
|
139
|
+
// eslint-disable-next-line es5/no-unicode-code-point-escape
|
|
140
|
+
NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
|
|
141
|
+
}
|
|
142
|
+
var NameStartChar_s = chars(NameStartChar);
|
|
143
|
+
|
|
144
|
+
// https://www.w3.org/TR/xml11/#NT-NameChar
|
|
145
|
+
// `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
|
|
146
|
+
var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
|
|
147
|
+
// https://www.w3.org/TR/xml11/#NT-Name
|
|
148
|
+
// `[5] Name ::= NameStartChar (NameChar)*`
|
|
149
|
+
var Name = reg(NameStartChar, NameChar, '*');
|
|
150
|
+
/*
|
|
151
|
+
https://www.w3.org/TR/xml11/#NT-Names
|
|
152
|
+
`[6] Names ::= Name (#x20 Name)*`
|
|
153
|
+
*/
|
|
154
|
+
|
|
155
|
+
// https://www.w3.org/TR/xml11/#NT-Nmtoken
|
|
156
|
+
// `[7] Nmtoken ::= (NameChar)+`
|
|
157
|
+
var Nmtoken = reg(NameChar, '+');
|
|
158
|
+
/*
|
|
159
|
+
https://www.w3.org/TR/xml11/#NT-Nmtokens
|
|
160
|
+
`[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
|
|
161
|
+
var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
|
|
162
|
+
*/
|
|
163
|
+
|
|
164
|
+
// https://www.w3.org/TR/xml11/#NT-EntityRef
|
|
165
|
+
// `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
|
|
166
|
+
var EntityRef = reg('&', Name, ';');
|
|
167
|
+
// https://www.w3.org/TR/xml11/#NT-CharRef
|
|
168
|
+
// `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
|
|
169
|
+
var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
|
|
170
|
+
|
|
171
|
+
/*
|
|
172
|
+
https://www.w3.org/TR/xml11/#NT-Reference
|
|
173
|
+
- `[67] Reference ::= EntityRef | CharRef`
|
|
174
|
+
- `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
|
|
175
|
+
- `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
|
|
176
|
+
*/
|
|
177
|
+
var Reference = regg(EntityRef, '|', CharRef);
|
|
178
|
+
|
|
179
|
+
// https://www.w3.org/TR/xml11/#NT-PEReference
|
|
180
|
+
// `[69] PEReference ::= '%' Name ';'`
|
|
181
|
+
// [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
|
|
182
|
+
var PEReference = reg('%', Name, ';');
|
|
183
|
+
|
|
184
|
+
// https://www.w3.org/TR/xml11/#NT-EntityValue
|
|
185
|
+
// `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
|
|
186
|
+
var EntityValue = regg(
|
|
187
|
+
reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
|
|
188
|
+
'|',
|
|
189
|
+
reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
// https://www.w3.org/TR/xml11/#NT-AttValue
|
|
193
|
+
// `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
|
|
194
|
+
var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
|
|
195
|
+
|
|
196
|
+
// https://www.w3.org/TR/xml-names/#ns-decl
|
|
197
|
+
// https://www.w3.org/TR/xml-names/#ns-qualnames
|
|
198
|
+
// NameStartChar without ":"
|
|
199
|
+
var NCNameStartChar = chars_without(NameStartChar, ':');
|
|
200
|
+
// https://www.w3.org/TR/xml-names/#orphans
|
|
201
|
+
// `[5] NCNameChar ::= NameChar - ':'`
|
|
202
|
+
// An XML NameChar, minus the ":"
|
|
203
|
+
var NCNameChar = chars_without(NameChar, ':');
|
|
204
|
+
// https://www.w3.org/TR/xml-names/#NT-NCName
|
|
205
|
+
// `[4] NCName ::= Name - (Char* ':' Char*)`
|
|
206
|
+
// An XML Name, minus the ":"
|
|
207
|
+
var NCName = reg(NCNameStartChar, NCNameChar, '*');
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
https://www.w3.org/TR/xml-names/#ns-qualnames
|
|
211
|
+
|
|
212
|
+
```
|
|
213
|
+
[7] QName ::= PrefixedName | UnprefixedName
|
|
214
|
+
=== (NCName ':' NCName) | NCName
|
|
215
|
+
=== NCName (':' NCName)?
|
|
216
|
+
[8] PrefixedName ::= Prefix ':' LocalPart
|
|
217
|
+
=== NCName ':' NCName
|
|
218
|
+
[9] UnprefixedName ::= LocalPart
|
|
219
|
+
=== NCName
|
|
220
|
+
[10] Prefix ::= NCName
|
|
221
|
+
[11] LocalPart ::= NCName
|
|
222
|
+
```
|
|
223
|
+
*/
|
|
224
|
+
var QName = reg(NCName, regg(':', NCName), '?');
|
|
225
|
+
var QName_exact = reg('^', QName, '$');
|
|
226
|
+
var QName_group = reg('(', QName, ')');
|
|
227
|
+
|
|
228
|
+
// https://www.w3.org/TR/xml11/#NT-SystemLiteral
|
|
229
|
+
// `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
|
|
230
|
+
var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
|
|
231
|
+
|
|
232
|
+
/*
|
|
233
|
+
https://www.w3.org/TR/xml11/#NT-PI
|
|
234
|
+
```
|
|
235
|
+
[17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
|
|
236
|
+
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
|
237
|
+
```
|
|
238
|
+
target /xml/i is not excluded!
|
|
239
|
+
*/
|
|
240
|
+
var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
|
|
241
|
+
|
|
242
|
+
// https://www.w3.org/TR/xml11/#NT-PubidChar
|
|
243
|
+
// `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
|
|
244
|
+
var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
|
|
245
|
+
|
|
246
|
+
// https://www.w3.org/TR/xml11/#NT-PubidLiteral
|
|
247
|
+
// `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
|
|
248
|
+
var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
|
|
249
|
+
|
|
250
|
+
// https://www.w3.org/TR/xml11/#NT-CharData
|
|
251
|
+
// `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
|
|
252
|
+
|
|
253
|
+
var COMMENT_START = '<!--';
|
|
254
|
+
var COMMENT_END = '-->';
|
|
255
|
+
// https://www.w3.org/TR/xml11/#NT-Comment
|
|
256
|
+
// `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
|
|
257
|
+
var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
|
|
258
|
+
|
|
259
|
+
var PCDATA = '#PCDATA';
|
|
260
|
+
// https://www.w3.org/TR/xml11/#NT-Mixed
|
|
261
|
+
// `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
|
|
262
|
+
// https://www.w3.org/TR/xml-names/#NT-Mixed
|
|
263
|
+
// `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
|
|
264
|
+
// [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
|
|
265
|
+
var Mixed = regg(
|
|
266
|
+
reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
|
|
267
|
+
'|',
|
|
268
|
+
reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
|
|
269
|
+
);
|
|
270
|
+
|
|
271
|
+
var _children_quantity = /[?*+]?/;
|
|
272
|
+
/*
|
|
273
|
+
`[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
|
|
274
|
+
`[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
|
|
275
|
+
simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
276
|
+
var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
|
|
277
|
+
```
|
|
278
|
+
[48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
|
|
279
|
+
=== (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
|
|
280
|
+
!== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
|
|
281
|
+
```
|
|
282
|
+
simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
283
|
+
var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
|
|
284
|
+
*/
|
|
285
|
+
/*
|
|
286
|
+
Inefficient regular expression (High)
|
|
287
|
+
This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
|
|
288
|
+
https://github.com/xmldom/xmldom/security/code-scanning/91
|
|
289
|
+
var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
|
|
290
|
+
*/
|
|
291
|
+
/*
|
|
292
|
+
Inefficient regular expression (High)
|
|
293
|
+
This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
|
|
294
|
+
https://github.com/xmldom/xmldom/security/code-scanning/92
|
|
295
|
+
var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
|
|
296
|
+
*/
|
|
297
|
+
|
|
298
|
+
// `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
|
|
299
|
+
// simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
|
|
300
|
+
var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
|
|
301
|
+
|
|
302
|
+
// https://www.w3.org/TR/xml11/#NT-contentspec
|
|
303
|
+
// `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
|
|
304
|
+
var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
|
|
305
|
+
|
|
306
|
+
var ELEMENTDECL_START = '<!ELEMENT';
|
|
307
|
+
// https://www.w3.org/TR/xml11/#NT-elementdecl
|
|
308
|
+
// `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
|
|
309
|
+
// https://www.w3.org/TR/xml-names/#NT-elementdecl
|
|
310
|
+
// `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
|
|
311
|
+
// because of https://www.w3.org/TR/xml11/#NT-PEReference
|
|
312
|
+
// since xmldom is not supporting replacements of PEReferences in the DTD
|
|
313
|
+
// this also supports PEReference in the possible places
|
|
314
|
+
var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
|
|
315
|
+
|
|
316
|
+
// https://www.w3.org/TR/xml11/#NT-NotationType
|
|
317
|
+
// `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
|
|
318
|
+
// [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
|
|
319
|
+
var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
|
|
320
|
+
// https://www.w3.org/TR/xml11/#NT-Enumeration
|
|
321
|
+
// `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
|
|
322
|
+
// [VC: Enumeration] [VC: No Duplicate Tokens]
|
|
323
|
+
var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
|
|
324
|
+
|
|
325
|
+
// https://www.w3.org/TR/xml11/#NT-EnumeratedType
|
|
326
|
+
// `[57] EnumeratedType ::= NotationType | Enumeration`
|
|
327
|
+
var EnumeratedType = regg(NotationType, '|', Enumeration);
|
|
328
|
+
|
|
329
|
+
/*
|
|
330
|
+
```
|
|
331
|
+
[55] StringType ::= 'CDATA'
|
|
332
|
+
[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
|
|
333
|
+
| 'IDREF' [VC: IDREF]
|
|
334
|
+
| 'IDREFS' [VC: IDREF]
|
|
335
|
+
| 'ENTITY' [VC: Entity Name]
|
|
336
|
+
| 'ENTITIES' [VC: Entity Name]
|
|
337
|
+
| 'NMTOKEN' [VC: Name Token]
|
|
338
|
+
| 'NMTOKENS' [VC: Name Token]
|
|
339
|
+
[54] AttType ::= StringType | TokenizedType | EnumeratedType
|
|
340
|
+
```*/
|
|
341
|
+
var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
|
|
342
|
+
|
|
343
|
+
// `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
|
|
344
|
+
// [WFC: No < in Attribute Values] [WFC: No External Entity References]
|
|
345
|
+
// [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
|
|
346
|
+
var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
|
|
347
|
+
|
|
348
|
+
// https://www.w3.org/TR/xml11/#NT-AttDef
|
|
349
|
+
// [53] AttDef ::= S Name S AttType S DefaultDecl
|
|
350
|
+
// https://www.w3.org/TR/xml-names/#NT-AttDef
|
|
351
|
+
// [1] NSAttName ::= PrefixedAttName | DefaultAttName
|
|
352
|
+
// [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
|
|
353
|
+
// [3] DefaultAttName ::= 'xmlns'
|
|
354
|
+
// [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
|
|
355
|
+
// === S Name S AttType S DefaultDecl
|
|
356
|
+
// xmldom is not distinguishing between QName and NSAttName on this level
|
|
357
|
+
// to support XML without namespaces in DTD we can not restrict it to QName
|
|
358
|
+
var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
|
|
359
|
+
|
|
360
|
+
var ATTLIST_DECL_START = '<!ATTLIST';
|
|
361
|
+
// https://www.w3.org/TR/xml11/#NT-AttlistDecl
|
|
362
|
+
// `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
|
|
363
|
+
// https://www.w3.org/TR/xml-names/#NT-AttlistDecl
|
|
364
|
+
// `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
|
|
365
|
+
// to support XML without namespaces in DTD we can not restrict it to QName
|
|
366
|
+
var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
|
|
367
|
+
|
|
368
|
+
var SYSTEM = 'SYSTEM';
|
|
369
|
+
var PUBLIC = 'PUBLIC';
|
|
370
|
+
// https://www.w3.org/TR/xml11/#NT-ExternalID
|
|
371
|
+
// `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
|
|
372
|
+
var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
|
|
373
|
+
var ExternalID_match = reg(
|
|
374
|
+
'^',
|
|
375
|
+
regg(
|
|
376
|
+
regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
|
|
377
|
+
'|',
|
|
378
|
+
regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
|
|
379
|
+
)
|
|
380
|
+
);
|
|
381
|
+
|
|
382
|
+
// https://www.w3.org/TR/xml11/#NT-NDataDecl
|
|
383
|
+
// `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
|
|
384
|
+
var NDataDecl = regg(S, 'NDATA', S, Name);
|
|
385
|
+
|
|
386
|
+
// https://www.w3.org/TR/xml11/#NT-EntityDef
|
|
387
|
+
// `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
|
|
388
|
+
var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
|
|
389
|
+
|
|
390
|
+
var ENTITY_DECL_START = '<!ENTITY';
|
|
391
|
+
// https://www.w3.org/TR/xml11/#NT-GEDecl
|
|
392
|
+
// `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
|
|
393
|
+
var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
|
|
394
|
+
// https://www.w3.org/TR/xml11/#NT-PEDef
|
|
395
|
+
// `[74] PEDef ::= EntityValue | ExternalID`
|
|
396
|
+
var PEDef = regg(EntityValue, '|', ExternalID);
|
|
397
|
+
// https://www.w3.org/TR/xml11/#NT-PEDecl
|
|
398
|
+
// `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
|
|
399
|
+
var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
|
|
400
|
+
// https://www.w3.org/TR/xml11/#NT-EntityDecl
|
|
401
|
+
// `[70] EntityDecl ::= GEDecl | PEDecl`
|
|
402
|
+
var EntityDecl = regg(GEDecl, '|', PEDecl);
|
|
403
|
+
|
|
404
|
+
// https://www.w3.org/TR/xml11/#NT-PublicID
|
|
405
|
+
// `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
|
|
406
|
+
var PublicID = reg(PUBLIC, S, PubidLiteral);
|
|
407
|
+
// https://www.w3.org/TR/xml11/#NT-NotationDecl
|
|
408
|
+
// `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
|
|
409
|
+
var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
|
|
410
|
+
|
|
411
|
+
// https://www.w3.org/TR/xml11/#NT-Eq
|
|
412
|
+
// `[25] Eq ::= S? '=' S?`
|
|
413
|
+
var Eq = reg(S_OPT, '=', S_OPT);
|
|
414
|
+
// https://www.w3.org/TR/xml/#NT-VersionNum
|
|
415
|
+
// `[26] VersionNum ::= '1.' [0-9]+`
|
|
416
|
+
// https://www.w3.org/TR/xml11/#NT-VersionNum
|
|
417
|
+
// `[26] VersionNum ::= '1.1'`
|
|
418
|
+
var VersionNum = /1[.]\d+/;
|
|
419
|
+
// https://www.w3.org/TR/xml11/#NT-VersionInfo
|
|
420
|
+
// `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
|
|
421
|
+
var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
|
|
422
|
+
// https://www.w3.org/TR/xml11/#NT-EncName
|
|
423
|
+
// `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
|
|
424
|
+
var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
|
|
425
|
+
// https://www.w3.org/TR/xml11/#NT-EncDecl
|
|
426
|
+
// `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
|
|
427
|
+
var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
|
|
428
|
+
// https://www.w3.org/TR/xml11/#NT-SDDecl
|
|
429
|
+
// `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
|
|
430
|
+
var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
|
|
431
|
+
// https://www.w3.org/TR/xml11/#NT-XMLDecl
|
|
432
|
+
// [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
|
|
433
|
+
var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
|
|
434
|
+
|
|
435
|
+
/*
|
|
436
|
+
https://www.w3.org/TR/xml/#NT-markupdecl
|
|
437
|
+
https://www.w3.org/TR/xml11/#NT-markupdecl
|
|
438
|
+
`[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
|
|
439
|
+
var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
|
|
440
|
+
*/
|
|
441
|
+
/*
|
|
442
|
+
https://www.w3.org/TR/xml-names/#NT-doctypedecl
|
|
443
|
+
`[28a] DeclSep ::= PEReference | S`
|
|
444
|
+
https://www.w3.org/TR/xml11/#NT-intSubset
|
|
445
|
+
```
|
|
446
|
+
[28b] intSubset ::= (markupdecl | DeclSep)*
|
|
447
|
+
=== (markupdecl | PEReference | S)*
|
|
448
|
+
```
|
|
449
|
+
[WFC: PE Between Declarations]
|
|
450
|
+
var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
|
|
451
|
+
*/
|
|
452
|
+
var DOCTYPE_DECL_START = '<!DOCTYPE';
|
|
453
|
+
/*
|
|
454
|
+
https://www.w3.org/TR/xml11/#NT-doctypedecl
|
|
455
|
+
`[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
|
|
456
|
+
https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
|
|
457
|
+
`[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
|
|
458
|
+
var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
|
|
459
|
+
*/
|
|
460
|
+
|
|
461
|
+
var CDATA_START = '<![CDATA[';
|
|
462
|
+
var CDATA_END = ']]>';
|
|
463
|
+
var CDStart = /<!\[CDATA\[/;
|
|
464
|
+
var CDEnd = /\]\]>/;
|
|
465
|
+
var CData = reg(Char, '*?', CDEnd);
|
|
466
|
+
/*
|
|
467
|
+
https://www.w3.org/TR/xml/#dt-cdsection
|
|
468
|
+
`[18] CDSect ::= CDStart CData CDEnd`
|
|
469
|
+
`[19] CDStart ::= '<![CDATA['`
|
|
470
|
+
`[20] CData ::= (Char* - (Char* ']]>' Char*))`
|
|
471
|
+
`[21] CDEnd ::= ']]>'`
|
|
472
|
+
*/
|
|
473
|
+
var CDSect = reg(CDStart, CData);
|
|
474
|
+
|
|
475
|
+
// unit tested
|
|
476
|
+
exports.chars = chars;
|
|
477
|
+
exports.chars_without = chars_without;
|
|
478
|
+
exports.detectUnicodeSupport = detectUnicodeSupport;
|
|
479
|
+
exports.reg = reg;
|
|
480
|
+
exports.regg = regg;
|
|
481
|
+
exports.AttlistDecl = AttlistDecl;
|
|
482
|
+
exports.CDATA_START = CDATA_START;
|
|
483
|
+
exports.CDATA_END = CDATA_END;
|
|
484
|
+
exports.CDSect = CDSect;
|
|
485
|
+
exports.Char = Char;
|
|
486
|
+
exports.Comment = Comment;
|
|
487
|
+
exports.COMMENT_START = COMMENT_START;
|
|
488
|
+
exports.COMMENT_END = COMMENT_END;
|
|
489
|
+
exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
|
|
490
|
+
exports.elementdecl = elementdecl;
|
|
491
|
+
exports.EntityDecl = EntityDecl;
|
|
492
|
+
exports.EntityValue = EntityValue;
|
|
493
|
+
exports.ExternalID = ExternalID;
|
|
494
|
+
exports.ExternalID_match = ExternalID_match;
|
|
495
|
+
exports.Name = Name;
|
|
496
|
+
exports.NotationDecl = NotationDecl;
|
|
497
|
+
exports.PEReference = PEReference;
|
|
498
|
+
exports.PI = PI;
|
|
499
|
+
exports.PUBLIC = PUBLIC;
|
|
500
|
+
exports.PubidLiteral = PubidLiteral;
|
|
501
|
+
exports.QName = QName;
|
|
502
|
+
exports.QName_exact = QName_exact;
|
|
503
|
+
exports.QName_group = QName_group;
|
|
504
|
+
exports.S = S;
|
|
505
|
+
exports.SChar_s = SChar_s;
|
|
506
|
+
exports.S_OPT = S_OPT;
|
|
507
|
+
exports.SYSTEM = SYSTEM;
|
|
508
|
+
exports.SystemLiteral = SystemLiteral;
|
|
509
|
+
exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
|
|
510
|
+
exports.XMLDecl = XMLDecl;
|
package/lib/index.js
CHANGED
|
@@ -1,6 +1,20 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var conventions = require('./conventions');
|
|
4
|
+
exports.assign = conventions.assign;
|
|
5
|
+
exports.hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
|
|
6
|
+
exports.isHTMLMimeType = conventions.isHTMLMimeType;
|
|
7
|
+
exports.isValidMimeType = conventions.isValidMimeType;
|
|
8
|
+
exports.MIME_TYPE = conventions.MIME_TYPE;
|
|
9
|
+
exports.NAMESPACE = conventions.NAMESPACE;
|
|
10
|
+
exports.ParseError = conventions.ParseError;
|
|
11
|
+
|
|
3
12
|
var dom = require('./dom');
|
|
13
|
+
exports.DOMException = dom.DOMException;
|
|
4
14
|
exports.DOMImplementation = dom.DOMImplementation;
|
|
5
15
|
exports.XMLSerializer = dom.XMLSerializer;
|
|
6
|
-
|
|
16
|
+
|
|
17
|
+
var domParser = require('./dom-parser');
|
|
18
|
+
exports.DOMParser = domParser.DOMParser;
|
|
19
|
+
exports.onErrorStopParsing = domParser.onErrorStopParsing;
|
|
20
|
+
exports.onWarningStopParsing = domParser.onWarningStopParsing;
|