@xmldom/xmldom 0.9.0-beta.8 → 0.9.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js ADDED
@@ -0,0 +1,510 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Detects relevant unicode support for regular expressions in the runtime.
5
+ * Should the runtime not accepts the flag `u` or unicode ranges,
6
+ * character classes without unicode handling will be used.
7
+ *
8
+ * @param {typeof RegExp} [RegExpImpl=RegExp] for testing: the RegExp class
9
+ * @returns {boolean}
10
+ * @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
11
+ */
12
+ function detectUnicodeSupport(RegExpImpl) {
13
+ try {
14
+ if (typeof RegExpImpl !== 'function') {
15
+ RegExpImpl = RegExp;
16
+ }
17
+ // eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
18
+ var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
19
+ return !!match && match[0].length === 2;
20
+ } catch (error) {}
21
+ return false;
22
+ }
23
+ var UNICODE_SUPPORT = detectUnicodeSupport();
24
+
25
+ /**
26
+ * Removes `[`, `]` and any trailing quantifiers from the source of a RegExp
27
+ * @param {RegExp} regexp
28
+ */
29
+ function chars(regexp) {
30
+ if (regexp.source[0] !== '[') {
31
+ throw new Error(regexp + ' can not be used with chars');
32
+ }
33
+ return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
34
+ }
35
+
36
+ /**
37
+ * Creates a new character list regular expression,
38
+ * by removing `search` from the source of `regexp`.
39
+ * @param {RegExp} regexp
40
+ * @param {string} search the character(s) to remove
41
+ * @returns {RegExp}
42
+ */
43
+ function chars_without(regexp, search) {
44
+ if (regexp.source[0] !== '[') {
45
+ throw new Error('/' + regexp.source + '/ can not be used with chars_without');
46
+ }
47
+ if (!search || typeof search !== 'string') {
48
+ throw new Error(JSON.stringify(search) + ' is not a valid search');
49
+ }
50
+ if (regexp.source.indexOf(search) === -1) {
51
+ throw new Error('"' + search + '" is not is /' + regexp.source + '/');
52
+ }
53
+ if (search === '-' && regexp.source.indexOf(search) !== 1) {
54
+ throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
55
+ }
56
+ return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
57
+ }
58
+
59
+ /**
60
+ * Combines and Regular expressions correctly by using `RegExp.source`.
61
+ * @param {...(RegExp | string)[]} args
62
+ * @returns {RegExp}
63
+ */
64
+ function reg(args) {
65
+ var self = this;
66
+ return new RegExp(
67
+ Array.prototype.slice
68
+ .call(arguments)
69
+ .map(function (part) {
70
+ var isStr = typeof part === 'string';
71
+ if (isStr && self === undefined && part === '|') {
72
+ throw new Error('use regg instead of reg to wrap expressions with `|`!');
73
+ }
74
+ return isStr ? part : part.source;
75
+ })
76
+ .join(''),
77
+ UNICODE_SUPPORT ? 'mu' : 'm'
78
+ );
79
+ }
80
+
81
+ /**
82
+ * Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
83
+ * @param {...(RegExp | string)[]} args
84
+ * @returns {RegExp}
85
+ */
86
+ function regg(args) {
87
+ if (arguments.length === 0) {
88
+ throw new Error('no parameters provided');
89
+ }
90
+ return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
91
+ }
92
+
93
+ // /**
94
+ // * Append ^ to the beginning of the expression.
95
+ // * @param {...(RegExp | string)[]} args
96
+ // * @returns {RegExp}
97
+ // */
98
+ // function reg_start(args) {
99
+ // if (arguments.length === 0) {
100
+ // throw new Error('no parameters provided');
101
+ // }
102
+ // return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
103
+ // }
104
+
105
+ // https://www.w3.org/TR/xml/#document
106
+ // `[1] document ::= prolog element Misc*`
107
+ // https://www.w3.org/TR/xml11/#NT-document
108
+ // `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
109
+
110
+ // https://www.w3.org/TR/xml/#NT-Char
111
+ // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
112
+ // `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
113
+ // https://www.w3.org/TR/xml11/#NT-Char
114
+ // `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
115
+ // https://www.w3.org/TR/xml11/#NT-RestrictedChar
116
+ // `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
117
+ // https://www.w3.org/TR/xml11/#charsets
118
+ var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
119
+ if (UNICODE_SUPPORT) {
120
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
121
+ Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
122
+ }
123
+
124
+ var _SChar = /[\x20\x09\x0D\x0A]/;
125
+ var SChar_s = chars(_SChar);
126
+ // https://www.w3.org/TR/xml11/#NT-S
127
+ // `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
128
+ var S = reg(_SChar, '+');
129
+ // optional whitespace described as `S?` in the grammar,
130
+ // simplified to 0-n occurrences of the character class
131
+ // instead of 0-1 occurrences of a non-capturing group around S
132
+ var S_OPT = reg(_SChar, '*');
133
+
134
+ // https://www.w3.org/TR/xml11/#NT-NameStartChar
135
+ // `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
136
+ var NameStartChar =
137
+ /[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
138
+ if (UNICODE_SUPPORT) {
139
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
140
+ NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
141
+ }
142
+ var NameStartChar_s = chars(NameStartChar);
143
+
144
+ // https://www.w3.org/TR/xml11/#NT-NameChar
145
+ // `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
146
+ var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
147
+ // https://www.w3.org/TR/xml11/#NT-Name
148
+ // `[5] Name ::= NameStartChar (NameChar)*`
149
+ var Name = reg(NameStartChar, NameChar, '*');
150
+ /*
151
+ https://www.w3.org/TR/xml11/#NT-Names
152
+ `[6] Names ::= Name (#x20 Name)*`
153
+ */
154
+
155
+ // https://www.w3.org/TR/xml11/#NT-Nmtoken
156
+ // `[7] Nmtoken ::= (NameChar)+`
157
+ var Nmtoken = reg(NameChar, '+');
158
+ /*
159
+ https://www.w3.org/TR/xml11/#NT-Nmtokens
160
+ `[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
161
+ var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
162
+ */
163
+
164
+ // https://www.w3.org/TR/xml11/#NT-EntityRef
165
+ // `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
166
+ var EntityRef = reg('&', Name, ';');
167
+ // https://www.w3.org/TR/xml11/#NT-CharRef
168
+ // `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
169
+ var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
170
+
171
+ /*
172
+ https://www.w3.org/TR/xml11/#NT-Reference
173
+ - `[67] Reference ::= EntityRef | CharRef`
174
+ - `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
175
+ - `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
176
+ */
177
+ var Reference = regg(EntityRef, '|', CharRef);
178
+
179
+ // https://www.w3.org/TR/xml11/#NT-PEReference
180
+ // `[69] PEReference ::= '%' Name ';'`
181
+ // [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
182
+ var PEReference = reg('%', Name, ';');
183
+
184
+ // https://www.w3.org/TR/xml11/#NT-EntityValue
185
+ // `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
186
+ var EntityValue = regg(
187
+ reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
188
+ '|',
189
+ reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
190
+ );
191
+
192
+ // https://www.w3.org/TR/xml11/#NT-AttValue
193
+ // `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
194
+ var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
195
+
196
+ // https://www.w3.org/TR/xml-names/#ns-decl
197
+ // https://www.w3.org/TR/xml-names/#ns-qualnames
198
+ // NameStartChar without ":"
199
+ var NCNameStartChar = chars_without(NameStartChar, ':');
200
+ // https://www.w3.org/TR/xml-names/#orphans
201
+ // `[5] NCNameChar ::= NameChar - ':'`
202
+ // An XML NameChar, minus the ":"
203
+ var NCNameChar = chars_without(NameChar, ':');
204
+ // https://www.w3.org/TR/xml-names/#NT-NCName
205
+ // `[4] NCName ::= Name - (Char* ':' Char*)`
206
+ // An XML Name, minus the ":"
207
+ var NCName = reg(NCNameStartChar, NCNameChar, '*');
208
+
209
+ /**
210
+ https://www.w3.org/TR/xml-names/#ns-qualnames
211
+
212
+ ```
213
+ [7] QName ::= PrefixedName | UnprefixedName
214
+ === (NCName ':' NCName) | NCName
215
+ === NCName (':' NCName)?
216
+ [8] PrefixedName ::= Prefix ':' LocalPart
217
+ === NCName ':' NCName
218
+ [9] UnprefixedName ::= LocalPart
219
+ === NCName
220
+ [10] Prefix ::= NCName
221
+ [11] LocalPart ::= NCName
222
+ ```
223
+ */
224
+ var QName = reg(NCName, regg(':', NCName), '?');
225
+ var QName_exact = reg('^', QName, '$');
226
+ var QName_group = reg('(', QName, ')');
227
+
228
+ // https://www.w3.org/TR/xml11/#NT-SystemLiteral
229
+ // `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
230
+ var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
231
+
232
+ /*
233
+ https://www.w3.org/TR/xml11/#NT-PI
234
+ ```
235
+ [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
236
+ [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
237
+ ```
238
+ target /xml/i is not excluded!
239
+ */
240
+ var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
241
+
242
+ // https://www.w3.org/TR/xml11/#NT-PubidChar
243
+ // `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
244
+ var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
245
+
246
+ // https://www.w3.org/TR/xml11/#NT-PubidLiteral
247
+ // `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
248
+ var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
249
+
250
+ // https://www.w3.org/TR/xml11/#NT-CharData
251
+ // `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
252
+
253
+ var COMMENT_START = '<!--';
254
+ var COMMENT_END = '-->';
255
+ // https://www.w3.org/TR/xml11/#NT-Comment
256
+ // `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
257
+ var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
258
+
259
+ var PCDATA = '#PCDATA';
260
+ // https://www.w3.org/TR/xml11/#NT-Mixed
261
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
262
+ // https://www.w3.org/TR/xml-names/#NT-Mixed
263
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
264
+ // [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
265
+ var Mixed = regg(
266
+ reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
267
+ '|',
268
+ reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
269
+ );
270
+
271
+ var _children_quantity = /[?*+]?/;
272
+ /*
273
+ `[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
274
+ `[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
275
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
276
+ var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
277
+ ```
278
+ [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
279
+ === (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
280
+ !== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
281
+ ```
282
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
283
+ var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
284
+ */
285
+ /*
286
+ Inefficient regular expression (High)
287
+ This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
288
+ https://github.com/xmldom/xmldom/security/code-scanning/91
289
+ var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
290
+ */
291
+ /*
292
+ Inefficient regular expression (High)
293
+ This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
294
+ https://github.com/xmldom/xmldom/security/code-scanning/92
295
+ var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
296
+ */
297
+
298
+ // `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
299
+ // simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
300
+ var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
301
+
302
+ // https://www.w3.org/TR/xml11/#NT-contentspec
303
+ // `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
304
+ var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
305
+
306
+ var ELEMENTDECL_START = '<!ELEMENT';
307
+ // https://www.w3.org/TR/xml11/#NT-elementdecl
308
+ // `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
309
+ // https://www.w3.org/TR/xml-names/#NT-elementdecl
310
+ // `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
311
+ // because of https://www.w3.org/TR/xml11/#NT-PEReference
312
+ // since xmldom is not supporting replacements of PEReferences in the DTD
313
+ // this also supports PEReference in the possible places
314
+ var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
315
+
316
+ // https://www.w3.org/TR/xml11/#NT-NotationType
317
+ // `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
318
+ // [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
319
+ var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
320
+ // https://www.w3.org/TR/xml11/#NT-Enumeration
321
+ // `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
322
+ // [VC: Enumeration] [VC: No Duplicate Tokens]
323
+ var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
324
+
325
+ // https://www.w3.org/TR/xml11/#NT-EnumeratedType
326
+ // `[57] EnumeratedType ::= NotationType | Enumeration`
327
+ var EnumeratedType = regg(NotationType, '|', Enumeration);
328
+
329
+ /*
330
+ ```
331
+ [55] StringType ::= 'CDATA'
332
+ [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
333
+ | 'IDREF' [VC: IDREF]
334
+ | 'IDREFS' [VC: IDREF]
335
+ | 'ENTITY' [VC: Entity Name]
336
+ | 'ENTITIES' [VC: Entity Name]
337
+ | 'NMTOKEN' [VC: Name Token]
338
+ | 'NMTOKENS' [VC: Name Token]
339
+ [54] AttType ::= StringType | TokenizedType | EnumeratedType
340
+ ```*/
341
+ var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
342
+
343
+ // `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
344
+ // [WFC: No < in Attribute Values] [WFC: No External Entity References]
345
+ // [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
346
+ var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
347
+
348
+ // https://www.w3.org/TR/xml11/#NT-AttDef
349
+ // [53] AttDef ::= S Name S AttType S DefaultDecl
350
+ // https://www.w3.org/TR/xml-names/#NT-AttDef
351
+ // [1] NSAttName ::= PrefixedAttName | DefaultAttName
352
+ // [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
353
+ // [3] DefaultAttName ::= 'xmlns'
354
+ // [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
355
+ // === S Name S AttType S DefaultDecl
356
+ // xmldom is not distinguishing between QName and NSAttName on this level
357
+ // to support XML without namespaces in DTD we can not restrict it to QName
358
+ var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
359
+
360
+ var ATTLIST_DECL_START = '<!ATTLIST';
361
+ // https://www.w3.org/TR/xml11/#NT-AttlistDecl
362
+ // `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
363
+ // https://www.w3.org/TR/xml-names/#NT-AttlistDecl
364
+ // `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
365
+ // to support XML without namespaces in DTD we can not restrict it to QName
366
+ var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
367
+
368
+ var SYSTEM = 'SYSTEM';
369
+ var PUBLIC = 'PUBLIC';
370
+ // https://www.w3.org/TR/xml11/#NT-ExternalID
371
+ // `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
372
+ var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
373
+ var ExternalID_match = reg(
374
+ '^',
375
+ regg(
376
+ regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
377
+ '|',
378
+ regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
379
+ )
380
+ );
381
+
382
+ // https://www.w3.org/TR/xml11/#NT-NDataDecl
383
+ // `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
384
+ var NDataDecl = regg(S, 'NDATA', S, Name);
385
+
386
+ // https://www.w3.org/TR/xml11/#NT-EntityDef
387
+ // `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
388
+ var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
389
+
390
+ var ENTITY_DECL_START = '<!ENTITY';
391
+ // https://www.w3.org/TR/xml11/#NT-GEDecl
392
+ // `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
393
+ var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
394
+ // https://www.w3.org/TR/xml11/#NT-PEDef
395
+ // `[74] PEDef ::= EntityValue | ExternalID`
396
+ var PEDef = regg(EntityValue, '|', ExternalID);
397
+ // https://www.w3.org/TR/xml11/#NT-PEDecl
398
+ // `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
399
+ var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
400
+ // https://www.w3.org/TR/xml11/#NT-EntityDecl
401
+ // `[70] EntityDecl ::= GEDecl | PEDecl`
402
+ var EntityDecl = regg(GEDecl, '|', PEDecl);
403
+
404
+ // https://www.w3.org/TR/xml11/#NT-PublicID
405
+ // `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
406
+ var PublicID = reg(PUBLIC, S, PubidLiteral);
407
+ // https://www.w3.org/TR/xml11/#NT-NotationDecl
408
+ // `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
409
+ var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
410
+
411
+ // https://www.w3.org/TR/xml11/#NT-Eq
412
+ // `[25] Eq ::= S? '=' S?`
413
+ var Eq = reg(S_OPT, '=', S_OPT);
414
+ // https://www.w3.org/TR/xml/#NT-VersionNum
415
+ // `[26] VersionNum ::= '1.' [0-9]+`
416
+ // https://www.w3.org/TR/xml11/#NT-VersionNum
417
+ // `[26] VersionNum ::= '1.1'`
418
+ var VersionNum = /1[.]\d+/;
419
+ // https://www.w3.org/TR/xml11/#NT-VersionInfo
420
+ // `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
421
+ var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
422
+ // https://www.w3.org/TR/xml11/#NT-EncName
423
+ // `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
424
+ var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
425
+ // https://www.w3.org/TR/xml11/#NT-EncDecl
426
+ // `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
427
+ var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
428
+ // https://www.w3.org/TR/xml11/#NT-SDDecl
429
+ // `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
430
+ var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
431
+ // https://www.w3.org/TR/xml11/#NT-XMLDecl
432
+ // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
433
+ var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
434
+
435
+ /*
436
+ https://www.w3.org/TR/xml/#NT-markupdecl
437
+ https://www.w3.org/TR/xml11/#NT-markupdecl
438
+ `[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
439
+ var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
440
+ */
441
+ /*
442
+ https://www.w3.org/TR/xml-names/#NT-doctypedecl
443
+ `[28a] DeclSep ::= PEReference | S`
444
+ https://www.w3.org/TR/xml11/#NT-intSubset
445
+ ```
446
+ [28b] intSubset ::= (markupdecl | DeclSep)*
447
+ === (markupdecl | PEReference | S)*
448
+ ```
449
+ [WFC: PE Between Declarations]
450
+ var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
451
+ */
452
+ var DOCTYPE_DECL_START = '<!DOCTYPE';
453
+ /*
454
+ https://www.w3.org/TR/xml11/#NT-doctypedecl
455
+ `[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
456
+ https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
457
+ `[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
458
+ var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
459
+ */
460
+
461
+ var CDATA_START = '<![CDATA[';
462
+ var CDATA_END = ']]>';
463
+ var CDStart = /<!\[CDATA\[/;
464
+ var CDEnd = /\]\]>/;
465
+ var CData = reg(Char, '*?', CDEnd);
466
+ /*
467
+ https://www.w3.org/TR/xml/#dt-cdsection
468
+ `[18] CDSect ::= CDStart CData CDEnd`
469
+ `[19] CDStart ::= '<![CDATA['`
470
+ `[20] CData ::= (Char* - (Char* ']]>' Char*))`
471
+ `[21] CDEnd ::= ']]>'`
472
+ */
473
+ var CDSect = reg(CDStart, CData);
474
+
475
+ // unit tested
476
+ exports.chars = chars;
477
+ exports.chars_without = chars_without;
478
+ exports.detectUnicodeSupport = detectUnicodeSupport;
479
+ exports.reg = reg;
480
+ exports.regg = regg;
481
+ exports.AttlistDecl = AttlistDecl;
482
+ exports.CDATA_START = CDATA_START;
483
+ exports.CDATA_END = CDATA_END;
484
+ exports.CDSect = CDSect;
485
+ exports.Char = Char;
486
+ exports.Comment = Comment;
487
+ exports.COMMENT_START = COMMENT_START;
488
+ exports.COMMENT_END = COMMENT_END;
489
+ exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
490
+ exports.elementdecl = elementdecl;
491
+ exports.EntityDecl = EntityDecl;
492
+ exports.EntityValue = EntityValue;
493
+ exports.ExternalID = ExternalID;
494
+ exports.ExternalID_match = ExternalID_match;
495
+ exports.Name = Name;
496
+ exports.NotationDecl = NotationDecl;
497
+ exports.PEReference = PEReference;
498
+ exports.PI = PI;
499
+ exports.PUBLIC = PUBLIC;
500
+ exports.PubidLiteral = PubidLiteral;
501
+ exports.QName = QName;
502
+ exports.QName_exact = QName_exact;
503
+ exports.QName_group = QName_group;
504
+ exports.S = S;
505
+ exports.SChar_s = SChar_s;
506
+ exports.S_OPT = S_OPT;
507
+ exports.SYSTEM = SYSTEM;
508
+ exports.SystemLiteral = SystemLiteral;
509
+ exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
510
+ exports.XMLDecl = XMLDecl;
package/lib/index.js CHANGED
@@ -1,6 +1,20 @@
1
1
  'use strict';
2
2
 
3
+ var conventions = require('./conventions');
4
+ exports.assign = conventions.assign;
5
+ exports.hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
6
+ exports.isHTMLMimeType = conventions.isHTMLMimeType;
7
+ exports.isValidMimeType = conventions.isValidMimeType;
8
+ exports.MIME_TYPE = conventions.MIME_TYPE;
9
+ exports.NAMESPACE = conventions.NAMESPACE;
10
+ exports.ParseError = conventions.ParseError;
11
+
3
12
  var dom = require('./dom');
13
+ exports.DOMException = dom.DOMException;
4
14
  exports.DOMImplementation = dom.DOMImplementation;
5
15
  exports.XMLSerializer = dom.XMLSerializer;
6
- exports.DOMParser = require('./dom-parser').DOMParser;
16
+
17
+ var domParser = require('./dom-parser');
18
+ exports.DOMParser = domParser.DOMParser;
19
+ exports.onErrorStopParsing = domParser.onErrorStopParsing;
20
+ exports.onWarningStopParsing = domParser.onWarningStopParsing;