@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/grammar.js ADDED
@@ -0,0 +1,528 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Detects relevant unicode support for regular expressions in the runtime.
5
+ * Should the runtime not accepts the flag `u` or unicode ranges,
6
+ * character classes without unicode handling will be used.
7
+ *
8
+ * @param {typeof RegExp} [RegExpImpl=RegExp]
9
+ * For testing: the RegExp class.
10
+ * @returns {boolean}
11
+ * @see https://node.green/#ES2015-syntax-RegExp--y--and--u--flags
12
+ */
13
+ function detectUnicodeSupport(RegExpImpl) {
14
+ try {
15
+ if (typeof RegExpImpl !== 'function') {
16
+ RegExpImpl = RegExp;
17
+ }
18
+ // eslint-disable-next-line es5/no-unicode-regex,es5/no-unicode-code-point-escape
19
+ var match = new RegExpImpl('\u{1d306}', 'u').exec('𝌆');
20
+ return !!match && match[0].length === 2;
21
+ } catch (error) {}
22
+ return false;
23
+ }
24
+ var UNICODE_SUPPORT = detectUnicodeSupport();
25
+
26
+ /**
27
+ * Removes `[`, `]` and any trailing quantifiers from the source of a RegExp.
28
+ *
29
+ * @param {RegExp} regexp
30
+ */
31
+ function chars(regexp) {
32
+ if (regexp.source[0] !== '[') {
33
+ throw new Error(regexp + ' can not be used with chars');
34
+ }
35
+ return regexp.source.slice(1, regexp.source.lastIndexOf(']'));
36
+ }
37
+
38
+ /**
39
+ * Creates a new character list regular expression,
40
+ * by removing `search` from the source of `regexp`.
41
+ *
42
+ * @param {RegExp} regexp
43
+ * @param {string} search
44
+ * The character(s) to remove.
45
+ * @returns {RegExp}
46
+ */
47
+ function chars_without(regexp, search) {
48
+ if (regexp.source[0] !== '[') {
49
+ throw new Error('/' + regexp.source + '/ can not be used with chars_without');
50
+ }
51
+ if (!search || typeof search !== 'string') {
52
+ throw new Error(JSON.stringify(search) + ' is not a valid search');
53
+ }
54
+ if (regexp.source.indexOf(search) === -1) {
55
+ throw new Error('"' + search + '" is not is /' + regexp.source + '/');
56
+ }
57
+ if (search === '-' && regexp.source.indexOf(search) !== 1) {
58
+ throw new Error('"' + search + '" is not at the first postion of /' + regexp.source + '/');
59
+ }
60
+ return new RegExp(regexp.source.replace(search, ''), UNICODE_SUPPORT ? 'u' : '');
61
+ }
62
+
63
+ /**
64
+ * Combines and Regular expressions correctly by using `RegExp.source`.
65
+ *
66
+ * @param {...(RegExp | string)[]} args
67
+ * @returns {RegExp}
68
+ */
69
+ function reg(args) {
70
+ var self = this;
71
+ return new RegExp(
72
+ Array.prototype.slice
73
+ .call(arguments)
74
+ .map(function (part) {
75
+ var isStr = typeof part === 'string';
76
+ if (isStr && self === undefined && part === '|') {
77
+ throw new Error('use regg instead of reg to wrap expressions with `|`!');
78
+ }
79
+ return isStr ? part : part.source;
80
+ })
81
+ .join(''),
82
+ UNICODE_SUPPORT ? 'mu' : 'm'
83
+ );
84
+ }
85
+
86
+ /**
87
+ * Like `reg` but wraps the expression in `(?:`,`)` to create a non tracking group.
88
+ *
89
+ * @param {...(RegExp | string)[]} args
90
+ * @returns {RegExp}
91
+ */
92
+ function regg(args) {
93
+ if (arguments.length === 0) {
94
+ throw new Error('no parameters provided');
95
+ }
96
+ return reg.apply(regg, ['(?:'].concat(Array.prototype.slice.call(arguments), [')']));
97
+ }
98
+
99
+ // /**
100
+ // * Append ^ to the beginning of the expression.
101
+ // * @param {...(RegExp | string)[]} args
102
+ // * @returns {RegExp}
103
+ // */
104
+ // function reg_start(args) {
105
+ // if (arguments.length === 0) {
106
+ // throw new Error('no parameters provided');
107
+ // }
108
+ // return reg.apply(reg_start, ['^'].concat(Array.prototype.slice.call(arguments)));
109
+ // }
110
+
111
+ // https://www.w3.org/TR/xml/#document
112
+ // `[1] document ::= prolog element Misc*`
113
+ // https://www.w3.org/TR/xml11/#NT-document
114
+ // `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
115
+
116
+ /**
117
+ * A character usually appearing in wrongly converted strings.
118
+ *
119
+ * @type {string}
120
+ * @see https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
121
+ * @see https://nodejs.dev/en/api/v18/buffer/#buffers-and-character-encodings
122
+ * @see https://www.unicode.org/faq/utf_bom.html#BOM
123
+ * @readonly
124
+ */
125
+ var UNICODE_REPLACEMENT_CHARACTER = '\uFFFD';
126
+ // https://www.w3.org/TR/xml/#NT-Char
127
+ // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
128
+ // `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
129
+ // https://www.w3.org/TR/xml11/#NT-Char
130
+ // `[2] Char ::= [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
131
+ // https://www.w3.org/TR/xml11/#NT-RestrictedChar
132
+ // `[2a] RestrictedChar ::= [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]`
133
+ // https://www.w3.org/TR/xml11/#charsets
134
+ var Char = /[-\x09\x0A\x0D\x20-\x2C\x2E-\uD7FF\uE000-\uFFFD]/; // without \u10000-\uEFFFF
135
+ if (UNICODE_SUPPORT) {
136
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
137
+ Char = reg('[', chars(Char), '\\u{10000}-\\u{10FFFF}', ']');
138
+ }
139
+
140
+ var _SChar = /[\x20\x09\x0D\x0A]/;
141
+ var SChar_s = chars(_SChar);
142
+ // https://www.w3.org/TR/xml11/#NT-S
143
+ // `[3] S ::= (#x20 | #x9 | #xD | #xA)+`
144
+ var S = reg(_SChar, '+');
145
+ // optional whitespace described as `S?` in the grammar,
146
+ // simplified to 0-n occurrences of the character class
147
+ // instead of 0-1 occurrences of a non-capturing group around S
148
+ var S_OPT = reg(_SChar, '*');
149
+
150
+ // https://www.w3.org/TR/xml11/#NT-NameStartChar
151
+ // `[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]`
152
+ var NameStartChar =
153
+ /[:_a-zA-Z\xC0-\xD6\xD8-\xF6\xF8-\u02FF\u0370-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; // without \u10000-\uEFFFF
154
+ if (UNICODE_SUPPORT) {
155
+ // eslint-disable-next-line es5/no-unicode-code-point-escape
156
+ NameStartChar = reg('[', chars(NameStartChar), '\\u{10000}-\\u{10FFFF}', ']');
157
+ }
158
+ var NameStartChar_s = chars(NameStartChar);
159
+
160
+ // https://www.w3.org/TR/xml11/#NT-NameChar
161
+ // `[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]`
162
+ var NameChar = reg('[', NameStartChar_s, chars(/[-.0-9\xB7]/), chars(/[\u0300-\u036F\u203F-\u2040]/), ']');
163
+ // https://www.w3.org/TR/xml11/#NT-Name
164
+ // `[5] Name ::= NameStartChar (NameChar)*`
165
+ var Name = reg(NameStartChar, NameChar, '*');
166
+ /*
167
+ https://www.w3.org/TR/xml11/#NT-Names
168
+ `[6] Names ::= Name (#x20 Name)*`
169
+ */
170
+
171
+ // https://www.w3.org/TR/xml11/#NT-Nmtoken
172
+ // `[7] Nmtoken ::= (NameChar)+`
173
+ var Nmtoken = reg(NameChar, '+');
174
+ /*
175
+ https://www.w3.org/TR/xml11/#NT-Nmtokens
176
+ `[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*`
177
+ var Nmtokens = reg(Nmtoken, regg(/\x20/, Nmtoken), '*');
178
+ */
179
+
180
+ // https://www.w3.org/TR/xml11/#NT-EntityRef
181
+ // `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
182
+ var EntityRef = reg('&', Name, ';');
183
+ // https://www.w3.org/TR/xml11/#NT-CharRef
184
+ // `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
185
+ var CharRef = regg(/&#[0-9]+;|&#x[0-9a-fA-F]+;/);
186
+
187
+ /*
188
+ https://www.w3.org/TR/xml11/#NT-Reference
189
+ - `[67] Reference ::= EntityRef | CharRef`
190
+ - `[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'` [WFC: Legal Character]
191
+ - `[68] EntityRef ::= '&' Name ';'` [WFC: Entity Declared] [VC: Entity Declared] [WFC: Parsed Entity] [WFC: No Recursion]
192
+ */
193
+ var Reference = regg(EntityRef, '|', CharRef);
194
+
195
+ // https://www.w3.org/TR/xml11/#NT-PEReference
196
+ // `[69] PEReference ::= '%' Name ';'`
197
+ // [VC: Entity Declared] [WFC: No Recursion] [WFC: In DTD]
198
+ var PEReference = reg('%', Name, ';');
199
+
200
+ // https://www.w3.org/TR/xml11/#NT-EntityValue
201
+ // `[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"`
202
+ var EntityValue = regg(
203
+ reg('"', regg(/[^%&"]/, '|', PEReference, '|', Reference), '*', '"'),
204
+ '|',
205
+ reg("'", regg(/[^%&']/, '|', PEReference, '|', Reference), '*', "'")
206
+ );
207
+
208
+ // https://www.w3.org/TR/xml11/#NT-AttValue
209
+ // `[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"`
210
+ var AttValue = regg('"', regg(/[^<&"]/, '|', Reference), '*', '"', '|', "'", regg(/[^<&']/, '|', Reference), '*', "'");
211
+
212
+ // https://www.w3.org/TR/xml-names/#ns-decl
213
+ // https://www.w3.org/TR/xml-names/#ns-qualnames
214
+ // NameStartChar without ":"
215
+ var NCNameStartChar = chars_without(NameStartChar, ':');
216
+ // https://www.w3.org/TR/xml-names/#orphans
217
+ // `[5] NCNameChar ::= NameChar - ':'`
218
+ // An XML NameChar, minus the ":"
219
+ var NCNameChar = chars_without(NameChar, ':');
220
+ // https://www.w3.org/TR/xml-names/#NT-NCName
221
+ // `[4] NCName ::= Name - (Char* ':' Char*)`
222
+ // An XML Name, minus the ":"
223
+ var NCName = reg(NCNameStartChar, NCNameChar, '*');
224
+
225
+ /**
226
+ https://www.w3.org/TR/xml-names/#ns-qualnames
227
+
228
+ ```
229
+ [7] QName ::= PrefixedName | UnprefixedName
230
+ === (NCName ':' NCName) | NCName
231
+ === NCName (':' NCName)?
232
+ [8] PrefixedName ::= Prefix ':' LocalPart
233
+ === NCName ':' NCName
234
+ [9] UnprefixedName ::= LocalPart
235
+ === NCName
236
+ [10] Prefix ::= NCName
237
+ [11] LocalPart ::= NCName
238
+ ```
239
+ */
240
+ var QName = reg(NCName, regg(':', NCName), '?');
241
+ var QName_exact = reg('^', QName, '$');
242
+ var QName_group = reg('(', QName, ')');
243
+
244
+ // https://www.w3.org/TR/xml11/#NT-SystemLiteral
245
+ // `[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")`
246
+ var SystemLiteral = regg(/"[^"]*"|'[^']*'/);
247
+
248
+ /*
249
+ https://www.w3.org/TR/xml11/#NT-PI
250
+ ```
251
+ [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
252
+ [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
253
+ ```
254
+ target /xml/i is not excluded!
255
+ */
256
+ var PI = reg(/^<\?/, '(', Name, ')', regg(S, '(', Char, '*?)'), '?', /\?>/);
257
+
258
+ // https://www.w3.org/TR/xml11/#NT-PubidChar
259
+ // `[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]`
260
+ var PubidChar = /[\x20\x0D\x0Aa-zA-Z0-9-'()+,./:=?;!*#@$_%]/;
261
+
262
+ // https://www.w3.org/TR/xml11/#NT-PubidLiteral
263
+ // `[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"`
264
+ var PubidLiteral = regg('"', PubidChar, '*"', '|', "'", chars_without(PubidChar, "'"), "*'");
265
+
266
+ // https://www.w3.org/TR/xml11/#NT-CharData
267
+ // `[14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)`
268
+
269
+ var COMMENT_START = '<!--';
270
+ var COMMENT_END = '-->';
271
+ // https://www.w3.org/TR/xml11/#NT-Comment
272
+ // `[15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'`
273
+ var Comment = reg(COMMENT_START, regg(chars_without(Char, '-'), '|', reg('-', chars_without(Char, '-'))), '*', COMMENT_END);
274
+
275
+ var PCDATA = '#PCDATA';
276
+ // https://www.w3.org/TR/xml11/#NT-Mixed
277
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')'`
278
+ // https://www.w3.org/TR/xml-names/#NT-Mixed
279
+ // `[51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? QName)* S? ')*' | '(' S? '#PCDATA' S? ')'`
280
+ // [VC: Proper Group/PE Nesting] [VC: No Duplicate Types]
281
+ var Mixed = regg(
282
+ reg(/\(/, S_OPT, PCDATA, regg(S_OPT, /\|/, S_OPT, QName), '*', S_OPT, /\)\*/),
283
+ '|',
284
+ reg(/\(/, S_OPT, PCDATA, S_OPT, /\)/)
285
+ );
286
+
287
+ var _children_quantity = /[?*+]?/;
288
+ /*
289
+ `[49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'` [VC: Proper Group/PE Nesting]
290
+ `[50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'` [VC: Proper Group/PE Nesting]
291
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
292
+ var _choice_or_seq = reg('[', NameChar_s, SChar_s, chars(_children_quantity), '()|,]*');
293
+ ```
294
+ [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
295
+ === (Name | '(' S? cp ( S? '|' S? cp )+ S? ')' | '(' S? cp ( S? ',' S? cp )* S? ')') ('?' | '*' | '+')?
296
+ !== (Name | [_choice_or_seq]*) ('?' | '*' | '+')?
297
+ ```
298
+ simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
299
+ var cp = reg(regg(Name, '|', _choice_or_seq), _children_quantity);
300
+ */
301
+ /*
302
+ Inefficient regular expression (High)
303
+ This part of the regular expression may cause exponential backtracking on strings starting with '(|' and containing many repetitions of '|'.
304
+ https://github.com/xmldom/xmldom/security/code-scanning/91
305
+ var choice = regg(/\(/, S_OPT, cp, regg(S_OPT, /\|/, S_OPT, cp), '+', S_OPT, /\)/);
306
+ */
307
+ /*
308
+ Inefficient regular expression (High)
309
+ This part of the regular expression may cause exponential backtracking on strings starting with '(,' and containing many repetitions of ','.
310
+ https://github.com/xmldom/xmldom/security/code-scanning/92
311
+ var seq = regg(/\(/, S_OPT, cp, regg(S_OPT, /,/, S_OPT, cp), '*', S_OPT, /\)/);
312
+ */
313
+
314
+ // `[47] children ::= (choice | seq) ('?' | '*' | '+')?`
315
+ // simplification to solve circular referencing, but doesn't check validity constraint "Proper Group/PE Nesting"
316
+ var children = reg(/\([^>]+\)/, _children_quantity /*regg(choice, '|', seq), _children_quantity*/);
317
+
318
+ // https://www.w3.org/TR/xml11/#NT-contentspec
319
+ // `[46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children`
320
+ var contentspec = regg('EMPTY', '|', 'ANY', '|', Mixed, '|', children);
321
+
322
+ var ELEMENTDECL_START = '<!ELEMENT';
323
+ // https://www.w3.org/TR/xml11/#NT-elementdecl
324
+ // `[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'`
325
+ // https://www.w3.org/TR/xml-names/#NT-elementdecl
326
+ // `[17] elementdecl ::= '<!ELEMENT' S QName S contentspec S? '>'`
327
+ // because of https://www.w3.org/TR/xml11/#NT-PEReference
328
+ // since xmldom is not supporting replacements of PEReferences in the DTD
329
+ // this also supports PEReference in the possible places
330
+ var elementdecl = reg(ELEMENTDECL_START, S, regg(QName, '|', PEReference), S, regg(contentspec, '|', PEReference), S_OPT, '>');
331
+
332
+ // https://www.w3.org/TR/xml11/#NT-NotationType
333
+ // `[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'`
334
+ // [VC: Notation Attributes] [VC: One Notation Per Element Type] [VC: No Notation on Empty Element] [VC: No Duplicate Tokens]
335
+ var NotationType = reg('NOTATION', S, /\(/, S_OPT, Name, regg(S_OPT, /\|/, S_OPT, Name), '*', S_OPT, /\)/);
336
+ // https://www.w3.org/TR/xml11/#NT-Enumeration
337
+ // `[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'`
338
+ // [VC: Enumeration] [VC: No Duplicate Tokens]
339
+ var Enumeration = reg(/\(/, S_OPT, Nmtoken, regg(S_OPT, /\|/, S_OPT, Nmtoken), '*', S_OPT, /\)/);
340
+
341
+ // https://www.w3.org/TR/xml11/#NT-EnumeratedType
342
+ // `[57] EnumeratedType ::= NotationType | Enumeration`
343
+ var EnumeratedType = regg(NotationType, '|', Enumeration);
344
+
345
+ /*
346
+ ```
347
+ [55] StringType ::= 'CDATA'
348
+ [56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default]
349
+ | 'IDREF' [VC: IDREF]
350
+ | 'IDREFS' [VC: IDREF]
351
+ | 'ENTITY' [VC: Entity Name]
352
+ | 'ENTITIES' [VC: Entity Name]
353
+ | 'NMTOKEN' [VC: Name Token]
354
+ | 'NMTOKENS' [VC: Name Token]
355
+ [54] AttType ::= StringType | TokenizedType | EnumeratedType
356
+ ```*/
357
+ var AttType = regg(/CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS/, '|', EnumeratedType);
358
+
359
+ // `[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)`
360
+ // [WFC: No < in Attribute Values] [WFC: No External Entity References]
361
+ // [VC: Fixed Attribute Default] [VC: Required Attribute] [VC: Attribute Default Value Syntactically Correct]
362
+ var DefaultDecl = regg(/#REQUIRED|#IMPLIED/, '|', regg(regg('#FIXED', S), '?', AttValue));
363
+
364
+ // https://www.w3.org/TR/xml11/#NT-AttDef
365
+ // [53] AttDef ::= S Name S AttType S DefaultDecl
366
+ // https://www.w3.org/TR/xml-names/#NT-AttDef
367
+ // [1] NSAttName ::= PrefixedAttName | DefaultAttName
368
+ // [2] PrefixedAttName ::= 'xmlns:' NCName [NSC: Reserved Prefixes and Namespace Names]
369
+ // [3] DefaultAttName ::= 'xmlns'
370
+ // [21] AttDef ::= S (QName | NSAttName) S AttType S DefaultDecl
371
+ // === S Name S AttType S DefaultDecl
372
+ // xmldom is not distinguishing between QName and NSAttName on this level
373
+ // to support XML without namespaces in DTD we can not restrict it to QName
374
+ var AttDef = regg(S, Name, S, AttType, S, DefaultDecl);
375
+
376
+ var ATTLIST_DECL_START = '<!ATTLIST';
377
+ // https://www.w3.org/TR/xml11/#NT-AttlistDecl
378
+ // `[52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'`
379
+ // https://www.w3.org/TR/xml-names/#NT-AttlistDecl
380
+ // `[20] AttlistDecl ::= '<!ATTLIST' S QName AttDef* S? '>'`
381
+ // to support XML without namespaces in DTD we can not restrict it to QName
382
+ var AttlistDecl = reg(ATTLIST_DECL_START, S, Name, AttDef, '*', S_OPT, '>');
383
+
384
+ var SYSTEM = 'SYSTEM';
385
+ var PUBLIC = 'PUBLIC';
386
+ // https://www.w3.org/TR/xml11/#NT-ExternalID
387
+ // `[75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral`
388
+ var ExternalID = regg(regg(SYSTEM, S, SystemLiteral), '|', regg(PUBLIC, S, PubidLiteral, S, SystemLiteral));
389
+ var ExternalID_match = reg(
390
+ '^',
391
+ regg(
392
+ regg(SYSTEM, S, '(?<SystemLiteralOnly>', SystemLiteral, ')'),
393
+ '|',
394
+ regg(PUBLIC, S, '(?<PubidLiteral>', PubidLiteral, ')', S, '(?<SystemLiteral>', SystemLiteral, ')')
395
+ )
396
+ );
397
+
398
+ // https://www.w3.org/TR/xml11/#NT-NDataDecl
399
+ // `[76] NDataDecl ::= S 'NDATA' S Name` [VC: Notation Declared]
400
+ var NDataDecl = regg(S, 'NDATA', S, Name);
401
+
402
+ // https://www.w3.org/TR/xml11/#NT-EntityDef
403
+ // `[73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)`
404
+ var EntityDef = regg(EntityValue, '|', regg(ExternalID, NDataDecl, '?'));
405
+
406
+ var ENTITY_DECL_START = '<!ENTITY';
407
+ // https://www.w3.org/TR/xml11/#NT-GEDecl
408
+ // `[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'`
409
+ var GEDecl = reg(ENTITY_DECL_START, S, Name, S, EntityDef, S_OPT, '>');
410
+ // https://www.w3.org/TR/xml11/#NT-PEDef
411
+ // `[74] PEDef ::= EntityValue | ExternalID`
412
+ var PEDef = regg(EntityValue, '|', ExternalID);
413
+ // https://www.w3.org/TR/xml11/#NT-PEDecl
414
+ // `[72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'`
415
+ var PEDecl = reg(ENTITY_DECL_START, S, '%', S, Name, S, PEDef, S_OPT, '>');
416
+ // https://www.w3.org/TR/xml11/#NT-EntityDecl
417
+ // `[70] EntityDecl ::= GEDecl | PEDecl`
418
+ var EntityDecl = regg(GEDecl, '|', PEDecl);
419
+
420
+ // https://www.w3.org/TR/xml11/#NT-PublicID
421
+ // `[83] PublicID ::= 'PUBLIC' S PubidLiteral`
422
+ var PublicID = reg(PUBLIC, S, PubidLiteral);
423
+ // https://www.w3.org/TR/xml11/#NT-NotationDecl
424
+ // `[82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'` [VC: Unique Notation Name]
425
+ var NotationDecl = reg('<!NOTATION', S, Name, S, regg(ExternalID, '|', PublicID), S_OPT, '>');
426
+
427
+ // https://www.w3.org/TR/xml11/#NT-Eq
428
+ // `[25] Eq ::= S? '=' S?`
429
+ var Eq = reg(S_OPT, '=', S_OPT);
430
+ // https://www.w3.org/TR/xml/#NT-VersionNum
431
+ // `[26] VersionNum ::= '1.' [0-9]+`
432
+ // https://www.w3.org/TR/xml11/#NT-VersionNum
433
+ // `[26] VersionNum ::= '1.1'`
434
+ var VersionNum = /1[.]\d+/;
435
+ // https://www.w3.org/TR/xml11/#NT-VersionInfo
436
+ // `[24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')`
437
+ var VersionInfo = reg(S, 'version', Eq, regg("'", VersionNum, "'", '|', '"', VersionNum, '"'));
438
+ // https://www.w3.org/TR/xml11/#NT-EncName
439
+ // `[81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*`
440
+ var EncName = /[A-Za-z][-A-Za-z0-9._]*/;
441
+ // https://www.w3.org/TR/xml11/#NT-EncDecl
442
+ // `[80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )`
443
+ var EncodingDecl = regg(S, 'encoding', Eq, regg('"', EncName, '"', '|', "'", EncName, "'"));
444
+ // https://www.w3.org/TR/xml11/#NT-SDDecl
445
+ // `[32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))`
446
+ var SDDecl = regg(S, 'standalone', Eq, regg("'", regg('yes', '|', 'no'), "'", '|', '"', regg('yes', '|', 'no'), '"'));
447
+ // https://www.w3.org/TR/xml11/#NT-XMLDecl
448
+ // [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
449
+ var XMLDecl = reg(/^<\?xml/, VersionInfo, EncodingDecl, '?', SDDecl, '?', S_OPT, /\?>/);
450
+
451
+ /*
452
+ https://www.w3.org/TR/xml/#NT-markupdecl
453
+ https://www.w3.org/TR/xml11/#NT-markupdecl
454
+ `[29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment`
455
+ var markupdecl = regg(elementdecl, '|', AttlistDecl, '|', EntityDecl, '|', NotationDecl, '|', PI_unsafe, '|', Comment);
456
+ */
457
+ /*
458
+ https://www.w3.org/TR/xml-names/#NT-doctypedecl
459
+ `[28a] DeclSep ::= PEReference | S`
460
+ https://www.w3.org/TR/xml11/#NT-intSubset
461
+ ```
462
+ [28b] intSubset ::= (markupdecl | DeclSep)*
463
+ === (markupdecl | PEReference | S)*
464
+ ```
465
+ [WFC: PE Between Declarations]
466
+ var intSubset = reg(regg(markupdecl, '|', PEReference, '|', S), '*');
467
+ */
468
+ var DOCTYPE_DECL_START = '<!DOCTYPE';
469
+ /*
470
+ https://www.w3.org/TR/xml11/#NT-doctypedecl
471
+ `[28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'`
472
+ https://www.afterwardsw3.org/TR/xml-names/#NT-doctypedecl
473
+ `[16] doctypedecl ::= '<!DOCTYPE' S QName (S ExternalID)? S? ('[' (markupdecl | PEReference | S)* ']' S?)? '>'`
474
+ var doctypedecl = reg('<!DOCTYPE', S, Name, regg(S, ExternalID), '?', S_OPT, regg(/\[/, intSubset, /]/, S_OPT), '?', '>');
475
+ */
476
+
477
+ var CDATA_START = '<![CDATA[';
478
+ var CDATA_END = ']]>';
479
+ var CDStart = /<!\[CDATA\[/;
480
+ var CDEnd = /\]\]>/;
481
+ var CData = reg(Char, '*?', CDEnd);
482
+ /*
483
+ https://www.w3.org/TR/xml/#dt-cdsection
484
+ `[18] CDSect ::= CDStart CData CDEnd`
485
+ `[19] CDStart ::= '<![CDATA['`
486
+ `[20] CData ::= (Char* - (Char* ']]>' Char*))`
487
+ `[21] CDEnd ::= ']]>'`
488
+ */
489
+ var CDSect = reg(CDStart, CData);
490
+
491
+ // unit tested
492
+ exports.chars = chars;
493
+ exports.chars_without = chars_without;
494
+ exports.detectUnicodeSupport = detectUnicodeSupport;
495
+ exports.reg = reg;
496
+ exports.regg = regg;
497
+ exports.AttlistDecl = AttlistDecl;
498
+ exports.CDATA_START = CDATA_START;
499
+ exports.CDATA_END = CDATA_END;
500
+ exports.CDSect = CDSect;
501
+ exports.Char = Char;
502
+ exports.Comment = Comment;
503
+ exports.COMMENT_START = COMMENT_START;
504
+ exports.COMMENT_END = COMMENT_END;
505
+ exports.DOCTYPE_DECL_START = DOCTYPE_DECL_START;
506
+ exports.elementdecl = elementdecl;
507
+ exports.EntityDecl = EntityDecl;
508
+ exports.EntityValue = EntityValue;
509
+ exports.ExternalID = ExternalID;
510
+ exports.ExternalID_match = ExternalID_match;
511
+ exports.Name = Name;
512
+ exports.NotationDecl = NotationDecl;
513
+ exports.Reference = Reference;
514
+ exports.PEReference = PEReference;
515
+ exports.PI = PI;
516
+ exports.PUBLIC = PUBLIC;
517
+ exports.PubidLiteral = PubidLiteral;
518
+ exports.QName = QName;
519
+ exports.QName_exact = QName_exact;
520
+ exports.QName_group = QName_group;
521
+ exports.S = S;
522
+ exports.SChar_s = SChar_s;
523
+ exports.S_OPT = S_OPT;
524
+ exports.SYSTEM = SYSTEM;
525
+ exports.SystemLiteral = SystemLiteral;
526
+ exports.UNICODE_REPLACEMENT_CHARACTER = UNICODE_REPLACEMENT_CHARACTER;
527
+ exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
528
+ exports.XMLDecl = XMLDecl;
package/lib/index.js CHANGED
@@ -1,6 +1,20 @@
1
- 'use strict'
1
+ 'use strict';
2
2
 
3
- var dom = require('./dom')
4
- exports.DOMImplementation = dom.DOMImplementation
5
- exports.XMLSerializer = dom.XMLSerializer
6
- exports.DOMParser = require('./dom-parser').DOMParser
3
+ var conventions = require('./conventions');
4
+ exports.assign = conventions.assign;
5
+ exports.hasDefaultHTMLNamespace = conventions.hasDefaultHTMLNamespace;
6
+ exports.isHTMLMimeType = conventions.isHTMLMimeType;
7
+ exports.isValidMimeType = conventions.isValidMimeType;
8
+ exports.MIME_TYPE = conventions.MIME_TYPE;
9
+ exports.NAMESPACE = conventions.NAMESPACE;
10
+ exports.ParseError = conventions.ParseError;
11
+
12
+ var dom = require('./dom');
13
+ exports.DOMException = dom.DOMException;
14
+ exports.DOMImplementation = dom.DOMImplementation;
15
+ exports.XMLSerializer = dom.XMLSerializer;
16
+
17
+ var domParser = require('./dom-parser');
18
+ exports.DOMParser = domParser.DOMParser;
19
+ exports.onErrorStopParsing = domParser.onErrorStopParsing;
20
+ exports.onWarningStopParsing = domParser.onWarningStopParsing;