@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js ADDED
@@ -0,0 +1,516 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Detects relevant unicode support for regular expressions in the runtime.
5
+ * Should the runtime not accepts the flag `u` or unicode ranges,
6
+ * character classes without unicode handling will be used.
7
+ *
8
+ * @param {typeof RegExp} [RegExpImpl=RegExp]
9
+ * For testing: the RegExp class.
10
+ * @returns {boolean}
11
+ * @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
12
+ */
13
+ function detectUnicodeSupport(RegExpImpl) {
14
+ try {
15
+ if (typeof RegExpImpl !== 'function') {
16
+ RegExpImpl = RegExp;
17
+ }
18
+ // eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
19
+ var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
20
+ return !!match && match[0].length === 2;
21
+ } catch (error) {}
22
+ return false;
23
+ }
24
+ var UNICODE_SUPPORT = detectUnicodeSupport();
25
+
26
+ /**
27
+ * Removes `[`, `]` and any trailing quantifiers from the source of a RegExp.
28
+ *
29
+ * @param {RegExp} regexp
30
+ */
31
+ function chars(regexp) {
32
+ if (regexp.source[0] !== '[') {
33
+ throw new Error(regexp + ' can not be used with chars');
34
+ }
35
+ return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
36
+ }
37
+
38
+ /**
39
+ * Creates a new character list regular expression,
40
+ * by removing `search` from the source of `regexp`.
41
+ *
42
+ * @param {RegExp} regexp
43
+ * @param {string} search
44
+ * The character(s) to remove.
45
+ * @returns {RegExp}
46
+ */
47
+ function chars_without(regexp, search) {
48
+ if (regexp.source[0] !== '[') {
49
+ throw new Error('/' + regexp.source + '/ can not be used with chars_without');
50
+ }
51
+ if (!search || typeof search !== 'string') {
52
+ throw new Error(JSON.stringify(search) + ' is not a valid search');
53
+ }
54
+ if (regexp.source.indexOf(search) === -1) {
55
+ throw new Error('"' + search + '" is not is /' + regexp.source + '/');
56
+ }
57
+ if (search === '-' && regexp.source.indexOf(search) !== 1) {
58
+ throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
59
+ }
60
+ return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
61
+ }
62
+
63
+ /**
64
+ * Combines and Regular expressions correctly by using `RegExp.source`.
65
+ *
66
+ * @param {...(RegExp | string)[]} args
67
+ * @returns {RegExp}
68
+ */
69
+ function reg(args) {
70
+ var self = this;
71
+ return new RegExp(
72
+ Array.prototype.slice
73
+ .call(arguments)
74
+ .map(function (part) {
75
+ var isStr = typeof part === 'string';
76
+ if (isStr && self === undefined && part === '|') {
77
+ throw new Error('use regg instead of reg to wrap expressions with `|`!');
78
+ }
79
+ return isStr ? part : part.source;
80
+ })
81
+ .join(''),
82
+ UNICODE_SUPPORT ? 'mu' : 'm'
83
+ );
84
+ }
85
+
86
+ /**
87
+ * Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
88
+ *
89
+ * @param {...(RegExp | string)[]} args
90
+ * @returns {RegExp}
91
+ */
92
+ function regg(args) {
93
+ if (arguments.length === 0) {
94
+ throw new Error('no parameters provided');
95
+ }
96
+ return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
97
+ }
98
+
99
+ // /**
100
+ // * Append ^ to the beginning of the expression.
101
+ // * @param {...(RegExp | string)[]} args
102
+ // * @returns {RegExp}
103
+ // */
104
+ // function reg_start(args) {
105
+ // if (arguments.length === 0) {
106
+ // throw new Error('no parameters provided');
107
+ // }
108
+ // return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
109
+ // }
110
+
111
+ // https://www.w3.org/TR/xml/#document
112
+ // `[1] document ::= prolog element Misc*`
113
+ // https://www.w3.org/TR/xml11/#NT-document
114
+ // `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
115
+
116
+ // https://www.w3.org/TR/xml/#NT-Char
117
+ // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
118
+ // `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
119
+ // https://www.w3.org/TR/xml11/#NT-Char
120
+ // `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
121
+ // https://www.w3.org/TR/xml11/#NT-RestrictedChar
122
+ // `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
123
+ // https://www.w3.org/TR/xml11/#charsets
124
+ var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
125
+ if (UNICODE_SUPPORT) {
126
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
127
+ Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
128
+ }
129
+
130
+ var _SChar = /[\x20\x09\x0D\x0A]/;
131
+ var SChar_s = chars(_SChar);
132
+ // https://www.w3.org/TR/xml11/#NT-S
133
+ // `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
134
+ var S = reg(_SChar, '+');
135
+ // optional whitespace described as `S?` in the grammar,
136
+ // simplified to 0-n occurrences of the character class
137
+ // instead of 0-1 occurrences of a non-capturing group around S
138
+ var S_OPT = reg(_SChar, '*');
139
+
140
+ // https://www.w3.org/TR/xml11/#NT-NameStartChar
141
+ // `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
142
+ var NameStartChar =
143
+ /[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
144
+ if (UNICODE_SUPPORT) {
145
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
146
+ NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
147
+ }
148
+ var NameStartChar_s = chars(NameStartChar);
149
+
150
+ // https://www.w3.org/TR/xml11/#NT-NameChar
151
+ // `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
152
+ var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
153
+ // https://www.w3.org/TR/xml11/#NT-Name
154
+ // `[5] Name ::= NameStartChar (NameChar)*`
155
+ var Name = reg(NameStartChar, NameChar, '*');
156
+ /*
157
+ https://www.w3.org/TR/xml11/#NT-Names
158
+ `[6] Names ::= Name (#x20 Name)*`
159
+ */
160
+
161
+ // https://www.w3.org/TR/xml11/#NT-Nmtoken
162
+ // `[7] Nmtoken ::= (NameChar)+`
163
+ var Nmtoken = reg(NameChar, '+');
164
+ /*
165
+ https://www.w3.org/TR/xml11/#NT-Nmtokens
166
+ `[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
167
+ var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
168
+ */
169
+
170
+ // https://www.w3.org/TR/xml11/#NT-EntityRef
171
+ // `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
172
+ var EntityRef = reg('&', Name, ';');
173
+ // https://www.w3.org/TR/xml11/#NT-CharRef
174
+ // `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
175
+ var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
176
+
177
+ /*
178
+ https://www.w3.org/TR/xml11/#NT-Reference
179
+ - `[67] Reference ::= EntityRef | CharRef`
180
+ - `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
181
+ - `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
182
+ */
183
+ var Reference = regg(EntityRef, '|', CharRef);
184
+
185
+ // https://www.w3.org/TR/xml11/#NT-PEReference
186
+ // `[69] PEReference ::= '%' Name ';'`
187
+ // [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
188
+ var PEReference = reg('%', Name, ';');
189
+
190
+ // https://www.w3.org/TR/xml11/#NT-EntityValue
191
+ // `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
192
+ var EntityValue = regg(
193
+ reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
194
+ '|',
195
+ reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
196
+ );
197
+
198
+ // https://www.w3.org/TR/xml11/#NT-AttValue
199
+ // `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
200
+ var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
201
+
202
+ // https://www.w3.org/TR/xml-names/#ns-decl
203
+ // https://www.w3.org/TR/xml-names/#ns-qualnames
204
+ // NameStartChar without ":"
205
+ var NCNameStartChar = chars_without(NameStartChar, ':');
206
+ // https://www.w3.org/TR/xml-names/#orphans
207
+ // `[5] NCNameChar ::= NameChar - ':'`
208
+ // An XML NameChar, minus the ":"
209
+ var NCNameChar = chars_without(NameChar, ':');
210
+ // https://www.w3.org/TR/xml-names/#NT-NCName
211
+ // `[4] NCName ::= Name - (Char* ':' Char*)`
212
+ // An XML Name, minus the ":"
213
+ var NCName = reg(NCNameStartChar, NCNameChar, '*');
214
+
215
+ /**
216
+ https://www.w3.org/TR/xml-names/#ns-qualnames
217
+
218
+ ```
219
+ [7] QName ::= PrefixedName | UnprefixedName
220
+ === (NCName ':' NCName) | NCName
221
+ === NCName (':' NCName)?
222
+ [8] PrefixedName ::= Prefix ':' LocalPart
223
+ === NCName ':' NCName
224
+ [9] UnprefixedName ::= LocalPart
225
+ === NCName
226
+ [10] Prefix ::= NCName
227
+ [11] LocalPart ::= NCName
228
+ ```
229
+ */
230
+ var QName = reg(NCName, regg(':', NCName), '?');
231
+ var QName_exact = reg('^', QName, '$');
232
+ var QName_group = reg('(', QName, ')');
233
+
234
+ // https://www.w3.org/TR/xml11/#NT-SystemLiteral
235
+ // `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
236
+ var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
237
+
238
+ /*
239
+ https://www.w3.org/TR/xml11/#NT-PI
240
+ ```
241
+ [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
242
+ [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
243
+ ```
244
+ target /xml/i is not excluded!
245
+ */
246
+ var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
247
+
248
+ // https://www.w3.org/TR/xml11/#NT-PubidChar
249
+ // `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
250
+ var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
251
+
252
+ // https://www.w3.org/TR/xml11/#NT-PubidLiteral
253
+ // `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
254
+ var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
255
+
256
+ // https://www.w3.org/TR/xml11/#NT-CharData
257
+ // `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
258
+
259
+ var COMMENT_START = '<!--';
260
+ var COMMENT_END = '-->';
261
+ // https://www.w3.org/TR/xml11/#NT-Comment
262
+ // `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
263
+ var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
264
+
265
+ var PCDATA = '#PCDATA';
266
+ // https://www.w3.org/TR/xml11/#NT-Mixed
267
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
268
+ // https://www.w3.org/TR/xml-names/#NT-Mixed
269
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
270
+ // [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
271
+ var Mixed = regg(
272
+ reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
273
+ '|',
274
+ reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
275
+ );
276
+
277
+ var _children_quantity = /[?*+]?/;
278
+ /*
279
+ `[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
280
+ `[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
281
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
282
+ var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
283
+ ```
284
+ [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
285
+ === (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
286
+ !== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
287
+ ```
288
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
289
+ var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
290
+ */
291
+ /*
292
+ Inefficient regular expression (High)
293
+ This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
294
+ https://github.com/xmldom/xmldom/security/code-scanning/91
295
+ var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
296
+ */
297
+ /*
298
+ Inefficient regular expression (High)
299
+ This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
300
+ https://github.com/xmldom/xmldom/security/code-scanning/92
301
+ var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
302
+ */
303
+
304
+ // `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
305
+ // simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
306
+ var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
307
+
308
+ // https://www.w3.org/TR/xml11/#NT-contentspec
309
+ // `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
310
+ var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
311
+
312
+ var ELEMENTDECL_START = '<!ELEMENT';
313
+ // https://www.w3.org/TR/xml11/#NT-elementdecl
314
+ // `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
315
+ // https://www.w3.org/TR/xml-names/#NT-elementdecl
316
+ // `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
317
+ // because of https://www.w3.org/TR/xml11/#NT-PEReference
318
+ // since xmldom is not supporting replacements of PEReferences in the DTD
319
+ // this also supports PEReference in the possible places
320
+ var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
321
+
322
+ // https://www.w3.org/TR/xml11/#NT-NotationType
323
+ // `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
324
+ // [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
325
+ var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
326
+ // https://www.w3.org/TR/xml11/#NT-Enumeration
327
+ // `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
328
+ // [VC: Enumeration] [VC: No Duplicate Tokens]
329
+ var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
330
+
331
+ // https://www.w3.org/TR/xml11/#NT-EnumeratedType
332
+ // `[57] EnumeratedType ::= NotationType | Enumeration`
333
+ var EnumeratedType = regg(NotationType, '|', Enumeration);
334
+
335
+ /*
336
+ ```
337
+ [55] StringType ::= 'CDATA'
338
+ [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
339
+ | 'IDREF' [VC: IDREF]
340
+ | 'IDREFS' [VC: IDREF]
341
+ | 'ENTITY' [VC: Entity Name]
342
+ | 'ENTITIES' [VC: Entity Name]
343
+ | 'NMTOKEN' [VC: Name Token]
344
+ | 'NMTOKENS' [VC: Name Token]
345
+ [54] AttType ::= StringType | TokenizedType | EnumeratedType
346
+ ```*/
347
+ var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
348
+
349
+ // `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
350
+ // [WFC: No < in Attribute Values] [WFC: No External Entity References]
351
+ // [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
352
+ var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
353
+
354
+ // https://www.w3.org/TR/xml11/#NT-AttDef
355
+ // [53] AttDef ::= S Name S AttType S DefaultDecl
356
+ // https://www.w3.org/TR/xml-names/#NT-AttDef
357
+ // [1] NSAttName ::= PrefixedAttName | DefaultAttName
358
+ // [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
359
+ // [3] DefaultAttName ::= 'xmlns'
360
+ // [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
361
+ // === S Name S AttType S DefaultDecl
362
+ // xmldom is not distinguishing between QName and NSAttName on this level
363
+ // to support XML without namespaces in DTD we can not restrict it to QName
364
+ var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
365
+
366
+ var ATTLIST_DECL_START = '<!ATTLIST';
367
+ // https://www.w3.org/TR/xml11/#NT-AttlistDecl
368
+ // `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
369
+ // https://www.w3.org/TR/xml-names/#NT-AttlistDecl
370
+ // `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
371
+ // to support XML without namespaces in DTD we can not restrict it to QName
372
+ var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
373
+
374
+ var SYSTEM = 'SYSTEM';
375
+ var PUBLIC = 'PUBLIC';
376
+ // https://www.w3.org/TR/xml11/#NT-ExternalID
377
+ // `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
378
+ var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
379
+ var ExternalID_match = reg(
380
+ '^',
381
+ regg(
382
+ regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
383
+ '|',
384
+ regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
385
+ )
386
+ );
387
+
388
+ // https://www.w3.org/TR/xml11/#NT-NDataDecl
389
+ // `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
390
+ var NDataDecl = regg(S, 'NDATA', S, Name);
391
+
392
+ // https://www.w3.org/TR/xml11/#NT-EntityDef
393
+ // `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
394
+ var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
395
+
396
+ var ENTITY_DECL_START = '<!ENTITY';
397
+ // https://www.w3.org/TR/xml11/#NT-GEDecl
398
+ // `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
399
+ var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
400
+ // https://www.w3.org/TR/xml11/#NT-PEDef
401
+ // `[74] PEDef ::= EntityValue | ExternalID`
402
+ var PEDef = regg(EntityValue, '|', ExternalID);
403
+ // https://www.w3.org/TR/xml11/#NT-PEDecl
404
+ // `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
405
+ var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
406
+ // https://www.w3.org/TR/xml11/#NT-EntityDecl
407
+ // `[70] EntityDecl ::= GEDecl | PEDecl`
408
+ var EntityDecl = regg(GEDecl, '|', PEDecl);
409
+
410
+ // https://www.w3.org/TR/xml11/#NT-PublicID
411
+ // `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
412
+ var PublicID = reg(PUBLIC, S, PubidLiteral);
413
+ // https://www.w3.org/TR/xml11/#NT-NotationDecl
414
+ // `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
415
+ var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
416
+
417
+ // https://www.w3.org/TR/xml11/#NT-Eq
418
+ // `[25] Eq ::= S? '=' S?`
419
+ var Eq = reg(S_OPT, '=', S_OPT);
420
+ // https://www.w3.org/TR/xml/#NT-VersionNum
421
+ // `[26] VersionNum ::= '1.' [0-9]+`
422
+ // https://www.w3.org/TR/xml11/#NT-VersionNum
423
+ // `[26] VersionNum ::= '1.1'`
424
+ var VersionNum = /1[.]\d+/;
425
+ // https://www.w3.org/TR/xml11/#NT-VersionInfo
426
+ // `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
427
+ var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
428
+ // https://www.w3.org/TR/xml11/#NT-EncName
429
+ // `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
430
+ var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
431
+ // https://www.w3.org/TR/xml11/#NT-EncDecl
432
+ // `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
433
+ var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
434
+ // https://www.w3.org/TR/xml11/#NT-SDDecl
435
+ // `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
436
+ var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
437
+ // https://www.w3.org/TR/xml11/#NT-XMLDecl
438
+ // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
439
+ var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
440
+
441
+ /*
442
+ https://www.w3.org/TR/xml/#NT-markupdecl
443
+ https://www.w3.org/TR/xml11/#NT-markupdecl
444
+ `[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
445
+ var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
446
+ */
447
+ /*
448
+ https://www.w3.org/TR/xml-names/#NT-doctypedecl
449
+ `[28a] DeclSep ::= PEReference | S`
450
+ https://www.w3.org/TR/xml11/#NT-intSubset
451
+ ```
452
+ [28b] intSubset ::= (markupdecl | DeclSep)*
453
+ === (markupdecl | PEReference | S)*
454
+ ```
455
+ [WFC: PE Between Declarations]
456
+ var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
457
+ */
458
+ var DOCTYPE_DECL_START = '<!DOCTYPE';
459
+ /*
460
+ https://www.w3.org/TR/xml11/#NT-doctypedecl
461
+ `[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
462
+ https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
463
+ `[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
464
+ var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
465
+ */
466
+
467
+ var CDATA_START = '<![CDATA[';
468
+ var CDATA_END = ']]>';
469
+ var CDStart = /<!\[CDATA\[/;
470
+ var CDEnd = /\]\]>/;
471
+ var CData = reg(Char, '*?', CDEnd);
472
+ /*
473
+ https://www.w3.org/TR/xml/#dt-cdsection
474
+ `[18] CDSect ::= CDStart CData CDEnd`
475
+ `[19] CDStart ::= '<![CDATA['`
476
+ `[20] CData ::= (Char* - (Char* ']]>' Char*))`
477
+ `[21] CDEnd ::= ']]>'`
478
+ */
479
+ var CDSect = reg(CDStart, CData);
480
+
481
+ // unit tested
482
+ exports.chars = chars;
483
+ exports.chars_without = chars_without;
484
+ exports.detectUnicodeSupport = detectUnicodeSupport;
485
+ exports.reg = reg;
486
+ exports.regg = regg;
487
+ exports.AttlistDecl = AttlistDecl;
488
+ exports.CDATA_START = CDATA_START;
489
+ exports.CDATA_END = CDATA_END;
490
+ exports.CDSect = CDSect;
491
+ exports.Char = Char;
492
+ exports.Comment = Comment;
493
+ exports.COMMENT_START = COMMENT_START;
494
+ exports.COMMENT_END = COMMENT_END;
495
+ exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
496
+ exports.elementdecl = elementdecl;
497
+ exports.EntityDecl = EntityDecl;
498
+ exports.EntityValue = EntityValue;
499
+ exports.ExternalID = ExternalID;
500
+ exports.ExternalID_match = ExternalID_match;
501
+ exports.Name = Name;
502
+ exports.NotationDecl = NotationDecl;
503
+ exports.PEReference = PEReference;
504
+ exports.PI = PI;
505
+ exports.PUBLIC = PUBLIC;
506
+ exports.PubidLiteral = PubidLiteral;
507
+ exports.QName = QName;
508
+ exports.QName_exact = QName_exact;
509
+ exports.QName_group = QName_group;
510
+ exports.S = S;
511
+ exports.SChar_s = SChar_s;
512
+ exports.S_OPT = S_OPT;
513
+ exports.SYSTEM = SYSTEM;
514
+ exports.SystemLiteral = SystemLiteral;
515
+ exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
516
+ exports.XMLDecl = XMLDecl;
package/lib/index.js CHANGED
@@ -1,6 +1,20 @@
1
- 'use strict'
1
+ 'use strict';
2
2
 
3
- var dom = require('./dom')
4
- exports.DOMImplementation = dom.DOMImplementation
5
- exports.XMLSerializer = dom.XMLSerializer
6
- exports.DOMParser = require('./dom-parser').DOMParser
3
+ var conventions = require('./conventions');
4
+ exports.assign = conventions.assign;
5
+ exports.hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
6
+ exports.isHTMLMimeType = conventions.isHTMLMimeType;
7
+ exports.isValidMimeType = conventions.isValidMimeType;
8
+ exports.MIME_TYPE = conventions.MIME_TYPE;
9
+ exports.NAMESPACE = conventions.NAMESPACE;
10
+ exports.ParseError = conventions.ParseError;
11
+
12
+ var dom = require('./dom');
13
+ exports.DOMException = dom.DOMException;
14
+ exports.DOMImplementation = dom.DOMImplementation;
15
+ exports.XMLSerializer = dom.XMLSerializer;
16
+
17
+ var domParser = require('./dom-parser');
18
+ exports.DOMParser = domParser.DOMParser;
19
+ exports.onErrorStopParsing = domParser.onErrorStopParsing;
20
+ exports.onWarningStopParsing = domParser.onWarningStopParsing;