@xmldom/xmldom 0.9.0-beta.8 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +114 -1
- package/index.d.ts +369 -21
- package/lib/.eslintrc.yml +1 -1
- package/lib/conventions.js +153 -121
- package/lib/dom-parser.js +202 -132
- package/lib/dom.js +1085 -414
- package/lib/entities.js +14 -9
- package/lib/errors.js +206 -0
- package/lib/grammar.js +528 -0
- package/lib/index.js +33 -1
- package/lib/sax.js +395 -173
- package/package.json +73 -71
- package/readme.md +41 -44
package/lib/sax.js
CHANGED
|
@@ -1,25 +1,17 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var conventions = require('./conventions');
|
|
4
|
-
var
|
|
4
|
+
var g = require('./grammar');
|
|
5
|
+
var errors = require('./errors');
|
|
6
|
+
|
|
5
7
|
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
|
|
8
|
+
var isHTMLMimeType = conventions.isHTMLMimeType;
|
|
9
|
+
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
|
10
|
+
var hasOwn = conventions.hasOwn;
|
|
6
11
|
var NAMESPACE = conventions.NAMESPACE;
|
|
7
|
-
var
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
// https://www.w3.org/TR/REC-xml/#NT-Name
|
|
11
|
-
// https://www.w3.org/TR/xml-names/#ns-qualnames
|
|
12
|
-
// roughly matches /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
|
|
13
|
-
// which means we currently do not allow : as the first character in a tag name
|
|
14
|
-
var tagNamePattern = new RegExp(
|
|
15
|
-
'^' +
|
|
16
|
-
conventions.QNAME_START_CHAR.source +
|
|
17
|
-
conventions.NAME_CHAR.source +
|
|
18
|
-
'*(?:' +
|
|
19
|
-
conventions.NAME_START_CHAR.source +
|
|
20
|
-
conventions.NAME_CHAR.source +
|
|
21
|
-
'*)?$'
|
|
22
|
-
);
|
|
12
|
+
var ParseError = errors.ParseError;
|
|
13
|
+
var DOMException = errors.DOMException;
|
|
14
|
+
|
|
23
15
|
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
|
|
24
16
|
|
|
25
17
|
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
@@ -33,34 +25,33 @@ var S_ATTR_END = 5; //attr value end and no space(quot end)
|
|
|
33
25
|
var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
|
|
34
26
|
var S_TAG_CLOSE = 7; //closed el<el />
|
|
35
27
|
|
|
36
|
-
/**
|
|
37
|
-
* Creates an error that will not be caught by XMLReader aka the SAX parser.
|
|
38
|
-
*
|
|
39
|
-
* @param {string} message
|
|
40
|
-
* @param {any?} locator Optional, can provide details about the location in the source
|
|
41
|
-
* @constructor
|
|
42
|
-
*/
|
|
43
|
-
function ParseError(message, locator) {
|
|
44
|
-
this.message = message;
|
|
45
|
-
this.locator = locator;
|
|
46
|
-
if (Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
|
|
47
|
-
}
|
|
48
|
-
ParseError.prototype = new Error();
|
|
49
|
-
ParseError.prototype.name = ParseError.name;
|
|
50
|
-
|
|
51
28
|
function XMLReader() {}
|
|
52
29
|
|
|
53
30
|
XMLReader.prototype = {
|
|
54
31
|
parse: function (source, defaultNSMap, entityMap) {
|
|
55
32
|
var domBuilder = this.domBuilder;
|
|
56
33
|
domBuilder.startDocument();
|
|
57
|
-
_copy(defaultNSMap, (defaultNSMap =
|
|
34
|
+
_copy(defaultNSMap, (defaultNSMap = Object.create(null)));
|
|
58
35
|
parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
|
|
59
36
|
domBuilder.endDocument();
|
|
60
37
|
},
|
|
61
38
|
};
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Detecting everything that might be a reference,
|
|
42
|
+
* including those without ending `;`, since those are allowed in HTML.
|
|
43
|
+
* The entityReplacer takes care of verifying and transforming each occurrence,
|
|
44
|
+
* and reports to the errorHandler on those that are not OK,
|
|
45
|
+
* depending on the context.
|
|
46
|
+
*/
|
|
47
|
+
var ENTITY_REG = /&#?\w+;?/g;
|
|
48
|
+
|
|
62
49
|
function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
63
|
-
var isHTML =
|
|
50
|
+
var isHTML = isHTMLMimeType(domBuilder.mimeType);
|
|
51
|
+
if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
|
|
52
|
+
return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
|
|
53
|
+
}
|
|
54
|
+
|
|
64
55
|
function fixedFromCharCode(code) {
|
|
65
56
|
// String.prototype.fromCharCode does not supports
|
|
66
57
|
// > 2 bytes unicode chars directly
|
|
@@ -74,9 +65,20 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
74
65
|
return String.fromCharCode(code);
|
|
75
66
|
}
|
|
76
67
|
}
|
|
68
|
+
|
|
77
69
|
function entityReplacer(a) {
|
|
78
|
-
var
|
|
79
|
-
if (
|
|
70
|
+
var complete = a[a.length - 1] === ';' ? a : a + ';';
|
|
71
|
+
if (!isHTML && complete !== a) {
|
|
72
|
+
errorHandler.error('EntityRef: expecting ;');
|
|
73
|
+
return a;
|
|
74
|
+
}
|
|
75
|
+
var match = g.Reference.exec(complete);
|
|
76
|
+
if (!match || match[0].length !== complete.length) {
|
|
77
|
+
errorHandler.error('entity not matching Reference production: ' + a);
|
|
78
|
+
return a;
|
|
79
|
+
}
|
|
80
|
+
var k = complete.slice(1, -1);
|
|
81
|
+
if (hasOwn(entityMap, k)) {
|
|
80
82
|
return entityMap[k];
|
|
81
83
|
} else if (k.charAt(0) === '#') {
|
|
82
84
|
return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
|
|
@@ -85,15 +87,17 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
85
87
|
return a;
|
|
86
88
|
}
|
|
87
89
|
}
|
|
90
|
+
|
|
88
91
|
function appendText(end) {
|
|
89
92
|
//has some bugs
|
|
90
93
|
if (end > start) {
|
|
91
|
-
var xt = source.substring(start, end).replace(
|
|
94
|
+
var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
|
|
92
95
|
locator && position(start);
|
|
93
96
|
domBuilder.characters(xt, 0, end - start);
|
|
94
97
|
start = end;
|
|
95
98
|
}
|
|
96
99
|
}
|
|
100
|
+
|
|
97
101
|
function position(p, m) {
|
|
98
102
|
while (p >= lineEnd && (m = linePattern.exec(source))) {
|
|
99
103
|
lineStart = m.index;
|
|
@@ -102,74 +106,90 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
102
106
|
}
|
|
103
107
|
locator.columnNumber = p - lineStart + 1;
|
|
104
108
|
}
|
|
109
|
+
|
|
105
110
|
var lineStart = 0;
|
|
106
111
|
var lineEnd = 0;
|
|
107
112
|
var linePattern = /.*(?:\r\n?|\n)|.*$/g;
|
|
108
113
|
var locator = domBuilder.locator;
|
|
109
114
|
|
|
110
115
|
var parseStack = [{ currentNSMap: defaultNSMapCopy }];
|
|
111
|
-
var
|
|
116
|
+
var unclosedTags = [];
|
|
112
117
|
var start = 0;
|
|
113
118
|
while (true) {
|
|
114
119
|
try {
|
|
115
120
|
var tagStart = source.indexOf('<', start);
|
|
116
121
|
if (tagStart < 0) {
|
|
117
|
-
if (!
|
|
122
|
+
if (!isHTML && unclosedTags.length > 0) {
|
|
123
|
+
return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
|
|
124
|
+
}
|
|
125
|
+
if (!source.substring(start).match(/^\s*$/)) {
|
|
118
126
|
var doc = domBuilder.doc;
|
|
119
127
|
var text = doc.createTextNode(source.substr(start));
|
|
128
|
+
if (doc.documentElement) {
|
|
129
|
+
return errorHandler.error('Extra content at the end of the document');
|
|
130
|
+
}
|
|
120
131
|
doc.appendChild(text);
|
|
121
132
|
domBuilder.currentElement = text;
|
|
122
133
|
}
|
|
123
134
|
return;
|
|
124
135
|
}
|
|
125
136
|
if (tagStart > start) {
|
|
137
|
+
var fromSource = source.substring(start, tagStart);
|
|
138
|
+
if (!isHTML && unclosedTags.length === 0) {
|
|
139
|
+
fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
|
|
140
|
+
fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
|
|
141
|
+
}
|
|
126
142
|
appendText(tagStart);
|
|
127
143
|
}
|
|
128
144
|
switch (source.charAt(tagStart + 1)) {
|
|
129
145
|
case '/':
|
|
130
|
-
var
|
|
131
|
-
var end = source.indexOf('>', tagStart + 3);
|
|
146
|
+
var end = source.indexOf('>', tagStart + 2);
|
|
132
147
|
var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if (
|
|
138
|
-
errorHandler.
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
148
|
+
if (!tagNameRaw) {
|
|
149
|
+
return errorHandler.fatalError('end tag name missing');
|
|
150
|
+
}
|
|
151
|
+
var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
|
|
152
|
+
if (!tagNameMatch) {
|
|
153
|
+
return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
|
|
154
|
+
}
|
|
155
|
+
if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
|
|
156
|
+
// not enough information to provide a helpful error message,
|
|
157
|
+
// but parsing will throw since there is no root element
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
var currentTagName =
|
|
161
|
+
unclosedTags[unclosedTags.length - 1] ||
|
|
162
|
+
domBuilder.currentElement.tagName ||
|
|
163
|
+
domBuilder.doc.documentElement.tagName ||
|
|
164
|
+
'';
|
|
165
|
+
if (currentTagName !== tagNameMatch[1]) {
|
|
166
|
+
var tagNameLower = tagNameMatch[1].toLowerCase();
|
|
167
|
+
if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
|
|
168
|
+
return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
|
|
169
|
+
}
|
|
142
170
|
}
|
|
171
|
+
var config = parseStack.pop();
|
|
172
|
+
unclosedTags.pop();
|
|
143
173
|
var localNSMap = config.localNSMap;
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
for (var prefix in localNSMap) {
|
|
150
|
-
if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
|
|
151
|
-
domBuilder.endPrefixMapping(prefix);
|
|
152
|
-
}
|
|
174
|
+
domBuilder.endElement(config.uri, config.localName, currentTagName);
|
|
175
|
+
if (localNSMap) {
|
|
176
|
+
for (var prefix in localNSMap) {
|
|
177
|
+
if (hasOwn(localNSMap, prefix)) {
|
|
178
|
+
domBuilder.endPrefixMapping(prefix);
|
|
153
179
|
}
|
|
154
180
|
}
|
|
155
|
-
if (!endMatch) {
|
|
156
|
-
// No known test case
|
|
157
|
-
errorHandler.fatalError('end tag name: ' + tagName + ' is not match the current start tagName:' + config.tagName);
|
|
158
|
-
}
|
|
159
|
-
} else {
|
|
160
|
-
parseStack.push(config);
|
|
161
181
|
}
|
|
162
182
|
|
|
163
183
|
end++;
|
|
164
184
|
break;
|
|
165
|
-
// end
|
|
185
|
+
// end element
|
|
166
186
|
case '?': // <?...?>
|
|
167
187
|
locator && position(tagStart);
|
|
168
|
-
end =
|
|
188
|
+
end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
|
|
169
189
|
break;
|
|
170
190
|
case '!': // <!doctype,<![CDATA,<!--
|
|
171
191
|
locator && position(tagStart);
|
|
172
|
-
end =
|
|
192
|
+
end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
|
|
173
193
|
break;
|
|
174
194
|
default:
|
|
175
195
|
locator && position(tagStart);
|
|
@@ -179,10 +199,11 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
179
199
|
var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
|
|
180
200
|
var len = el.length;
|
|
181
201
|
|
|
182
|
-
if (!el.closed
|
|
183
|
-
el.
|
|
184
|
-
|
|
185
|
-
|
|
202
|
+
if (!el.closed) {
|
|
203
|
+
if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
|
|
204
|
+
el.closed = true;
|
|
205
|
+
} else {
|
|
206
|
+
unclosedTags.push(el.tagName);
|
|
186
207
|
}
|
|
187
208
|
}
|
|
188
209
|
if (locator && len) {
|
|
@@ -213,6 +234,8 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
213
234
|
} catch (e) {
|
|
214
235
|
if (e instanceof ParseError) {
|
|
215
236
|
throw e;
|
|
237
|
+
} else if (e instanceof DOMException) {
|
|
238
|
+
throw new ParseError(e.name + ': ' + e.message, domBuilder.locator, e);
|
|
216
239
|
}
|
|
217
240
|
errorHandler.error('element parse error: ' + e);
|
|
218
241
|
end = -1;
|
|
@@ -220,11 +243,12 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
|
220
243
|
if (end > start) {
|
|
221
244
|
start = end;
|
|
222
245
|
} else {
|
|
223
|
-
//
|
|
246
|
+
//Possible sax fallback here, risk of positional error
|
|
224
247
|
appendText(Math.max(tagStart, start) + 1);
|
|
225
248
|
}
|
|
226
249
|
}
|
|
227
250
|
}
|
|
251
|
+
|
|
228
252
|
function copyLocator(f, t) {
|
|
229
253
|
t.lineNumber = f.lineNumber;
|
|
230
254
|
t.columnNumber = f.columnNumber;
|
|
@@ -232,8 +256,9 @@ function copyLocator(f, t) {
|
|
|
232
256
|
}
|
|
233
257
|
|
|
234
258
|
/**
|
|
235
|
-
* @
|
|
236
|
-
*
|
|
259
|
+
* @returns
|
|
260
|
+
* end of the elementStartPart(end of elementEndPart for selfClosed el)
|
|
261
|
+
* @see {@link #appendElement}
|
|
237
262
|
*/
|
|
238
263
|
function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
|
|
239
264
|
/**
|
|
@@ -242,8 +267,11 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
|
|
|
242
267
|
* @param {number} startIndex
|
|
243
268
|
*/
|
|
244
269
|
function addAttribute(qname, value, startIndex) {
|
|
245
|
-
if (el.attributeNames
|
|
246
|
-
errorHandler.fatalError('Attribute ' + qname + ' redefined');
|
|
270
|
+
if (hasOwn(el.attributeNames, qname)) {
|
|
271
|
+
return errorHandler.fatalError('Attribute ' + qname + ' redefined');
|
|
272
|
+
}
|
|
273
|
+
if (!isHTML && value.indexOf('<') >= 0) {
|
|
274
|
+
return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
|
|
247
275
|
}
|
|
248
276
|
el.addValue(
|
|
249
277
|
qname,
|
|
@@ -251,10 +279,11 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
|
|
|
251
279
|
// since the xmldom sax parser does not "interpret" DTD the following is not implemented:
|
|
252
280
|
// - recursive replacement of (DTD) entity references
|
|
253
281
|
// - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
|
|
254
|
-
value.replace(/[\t\n\r]/g, ' ').replace(
|
|
282
|
+
value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
|
|
255
283
|
startIndex
|
|
256
284
|
);
|
|
257
285
|
}
|
|
286
|
+
|
|
258
287
|
var attrName;
|
|
259
288
|
var value;
|
|
260
289
|
var p = ++start;
|
|
@@ -362,9 +391,10 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
|
|
|
362
391
|
}
|
|
363
392
|
break;
|
|
364
393
|
case S_EQ:
|
|
365
|
-
|
|
394
|
+
if (!isHTML) {
|
|
395
|
+
return errorHandler.fatalError('AttValue: \' or " expected');
|
|
396
|
+
}
|
|
366
397
|
}
|
|
367
|
-
// console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
|
|
368
398
|
return p;
|
|
369
399
|
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
370
400
|
case '\u0080':
|
|
@@ -426,17 +456,17 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
|
|
|
426
456
|
}
|
|
427
457
|
}
|
|
428
458
|
} //end outer switch
|
|
429
|
-
//console.log('p++',p)
|
|
430
459
|
p++;
|
|
431
460
|
}
|
|
432
461
|
}
|
|
462
|
+
|
|
433
463
|
/**
|
|
434
|
-
* @
|
|
464
|
+
* @returns
|
|
465
|
+
* `true` if a new namespace has been defined.
|
|
435
466
|
*/
|
|
436
467
|
function appendElement(el, domBuilder, currentNSMap) {
|
|
437
468
|
var tagName = el.tagName;
|
|
438
469
|
var localNSMap = null;
|
|
439
|
-
//var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
|
440
470
|
var i = el.length;
|
|
441
471
|
while (i--) {
|
|
442
472
|
var a = el[i];
|
|
@@ -458,10 +488,8 @@ function appendElement(el, domBuilder, currentNSMap) {
|
|
|
458
488
|
if (nsPrefix !== false) {
|
|
459
489
|
//hack!!
|
|
460
490
|
if (localNSMap == null) {
|
|
461
|
-
localNSMap =
|
|
462
|
-
|
|
463
|
-
_copy(currentNSMap, (currentNSMap = {}));
|
|
464
|
-
//console.log(currentNSMap,1)
|
|
491
|
+
localNSMap = Object.create(null);
|
|
492
|
+
_copy(currentNSMap, (currentNSMap = Object.create(null)));
|
|
465
493
|
}
|
|
466
494
|
currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
|
|
467
495
|
a.uri = NAMESPACE.XMLNS;
|
|
@@ -498,7 +526,7 @@ function appendElement(el, domBuilder, currentNSMap) {
|
|
|
498
526
|
domBuilder.endElement(ns, localName, tagName);
|
|
499
527
|
if (localNSMap) {
|
|
500
528
|
for (prefix in localNSMap) {
|
|
501
|
-
if (
|
|
529
|
+
if (hasOwn(localNSMap, prefix)) {
|
|
502
530
|
domBuilder.endPrefixMapping(prefix);
|
|
503
531
|
}
|
|
504
532
|
}
|
|
@@ -510,6 +538,7 @@ function appendElement(el, domBuilder, currentNSMap) {
|
|
|
510
538
|
return true;
|
|
511
539
|
}
|
|
512
540
|
}
|
|
541
|
+
|
|
513
542
|
function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
|
|
514
543
|
// https://html.spec.whatwg.org/#raw-text-elements
|
|
515
544
|
// https://html.spec.whatwg.org/#escapable-raw-text-elements
|
|
@@ -521,117 +550,321 @@ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, do
|
|
|
521
550
|
var text = source.substring(elStartEnd + 1, elEndStart);
|
|
522
551
|
|
|
523
552
|
if (isEscapableRaw) {
|
|
524
|
-
text = text.replace(
|
|
553
|
+
text = text.replace(ENTITY_REG, entityReplacer);
|
|
525
554
|
}
|
|
526
555
|
domBuilder.characters(text, 0, text.length);
|
|
527
556
|
return elEndStart;
|
|
528
557
|
}
|
|
529
558
|
return elStartEnd + 1;
|
|
530
559
|
}
|
|
531
|
-
function fixSelfClosed(source, elStartEnd, tagName, closeMap) {
|
|
532
|
-
//if(tagName in closeMap){
|
|
533
|
-
var pos = closeMap[tagName];
|
|
534
|
-
if (pos == null) {
|
|
535
|
-
//console.log(tagName)
|
|
536
|
-
pos = source.lastIndexOf('</' + tagName + '>');
|
|
537
|
-
if (pos < elStartEnd) {
|
|
538
|
-
//忘记闭合
|
|
539
|
-
pos = source.lastIndexOf('</' + tagName);
|
|
540
|
-
}
|
|
541
|
-
closeMap[tagName] = pos;
|
|
542
|
-
}
|
|
543
|
-
return pos < elStartEnd;
|
|
544
|
-
//}
|
|
545
|
-
}
|
|
546
560
|
|
|
547
561
|
function _copy(source, target) {
|
|
548
562
|
for (var n in source) {
|
|
549
|
-
if (
|
|
563
|
+
if (hasOwn(source, n)) {
|
|
550
564
|
target[n] = source[n];
|
|
551
565
|
}
|
|
552
566
|
}
|
|
553
567
|
}
|
|
554
568
|
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
+
/**
|
|
570
|
+
* @typedef ParseUtils
|
|
571
|
+
* @property {function(relativeIndex: number?): string | undefined} char
|
|
572
|
+
* Provides look ahead access to a singe character relative to the current index.
|
|
573
|
+
* @property {function(): number} getIndex
|
|
574
|
+
* Provides read-only access to the current index.
|
|
575
|
+
* @property {function(reg: RegExp): string | null} getMatch
|
|
576
|
+
* Applies the provided regular expression enforcing that it starts at the current index and
|
|
577
|
+
* returns the complete matching string,
|
|
578
|
+
* and moves the current index by the length of the matching string.
|
|
579
|
+
* @property {function(): string} getSource
|
|
580
|
+
* Provides read-only access to the complete source.
|
|
581
|
+
* @property {function(places: number?): void} skip
|
|
582
|
+
* moves the current index by places (defaults to 1)
|
|
583
|
+
* @property {function(): number} skipBlanks
|
|
584
|
+
* Moves the current index by the amount of white space that directly follows the current index
|
|
585
|
+
* and returns the amount of whitespace chars skipped (0..n),
|
|
586
|
+
* or -1 if the end of the source was reached.
|
|
587
|
+
* @property {function(): string} substringFromIndex
|
|
588
|
+
* creates a substring from the current index to the end of `source`
|
|
589
|
+
* @property {function(compareWith: string): boolean} substringStartsWith
|
|
590
|
+
* Checks if source contains `compareWith`,
|
|
591
|
+
* starting from the current index.
|
|
592
|
+
* @see {@link parseUtils}
|
|
593
|
+
*/
|
|
594
|
+
|
|
595
|
+
/**
|
|
596
|
+
* A temporary scope for parsing and look ahead operations in `source`,
|
|
597
|
+
* starting from index `start`.
|
|
598
|
+
*
|
|
599
|
+
* Some operations move the current index by a number of positions,
|
|
600
|
+
* after which `getIndex` returns the new index.
|
|
601
|
+
*
|
|
602
|
+
* @param {string} source
|
|
603
|
+
* @param {number} start
|
|
604
|
+
* @returns {ParseUtils}
|
|
605
|
+
*/
|
|
606
|
+
function parseUtils(source, start) {
|
|
607
|
+
var index = start;
|
|
608
|
+
|
|
609
|
+
function char(n) {
|
|
610
|
+
n = n || 0;
|
|
611
|
+
return source.charAt(index + n);
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
function skip(n) {
|
|
615
|
+
n = n || 1;
|
|
616
|
+
index += n;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
function skipBlanks() {
|
|
620
|
+
var blanks = 0;
|
|
621
|
+
while (index < source.length) {
|
|
622
|
+
var c = char();
|
|
623
|
+
if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
|
|
624
|
+
return blanks;
|
|
625
|
+
}
|
|
626
|
+
blanks++;
|
|
627
|
+
skip();
|
|
628
|
+
}
|
|
629
|
+
return -1;
|
|
630
|
+
}
|
|
631
|
+
function substringFromIndex() {
|
|
632
|
+
return source.substring(index);
|
|
633
|
+
}
|
|
634
|
+
function substringStartsWith(text) {
|
|
635
|
+
return source.substring(index, index + text.length) === text;
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
function getMatch(args) {
|
|
639
|
+
var expr = g.reg('^', args);
|
|
640
|
+
var match = expr.exec(substringFromIndex());
|
|
641
|
+
if (match) {
|
|
642
|
+
skip(match[0].length);
|
|
643
|
+
return match[0];
|
|
644
|
+
}
|
|
645
|
+
return null;
|
|
646
|
+
}
|
|
647
|
+
return {
|
|
648
|
+
char: char,
|
|
649
|
+
getIndex: function () {
|
|
650
|
+
return index;
|
|
651
|
+
},
|
|
652
|
+
getMatch: getMatch,
|
|
653
|
+
getSource: function () {
|
|
654
|
+
return source;
|
|
655
|
+
},
|
|
656
|
+
skip: skip,
|
|
657
|
+
skipBlanks: skipBlanks,
|
|
658
|
+
substringFromIndex: substringFromIndex,
|
|
659
|
+
substringStartsWith: substringStartsWith,
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* @param {ParseUtils} p
|
|
665
|
+
* @param {DOMHandler} errorHandler
|
|
666
|
+
* @returns {string}
|
|
667
|
+
*/
|
|
668
|
+
function parseDoctypeInternalSubset(p, errorHandler) {
|
|
669
|
+
/**
|
|
670
|
+
* @param {ParseUtils} p
|
|
671
|
+
* @param {DOMHandler} errorHandler
|
|
672
|
+
* @returns {string}
|
|
673
|
+
*/
|
|
674
|
+
function parsePI(p, errorHandler) {
|
|
675
|
+
var match = g.PI.exec(p.substringFromIndex());
|
|
676
|
+
if (!match) {
|
|
677
|
+
return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
|
|
678
|
+
}
|
|
679
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
680
|
+
return errorHandler.fatalError(
|
|
681
|
+
'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
|
|
682
|
+
);
|
|
683
|
+
}
|
|
684
|
+
p.skip(match[0].length);
|
|
685
|
+
return match[0];
|
|
686
|
+
}
|
|
687
|
+
// Parse internal subset
|
|
688
|
+
var source = p.getSource();
|
|
689
|
+
if (p.char() === '[') {
|
|
690
|
+
p.skip(1);
|
|
691
|
+
var intSubsetStart = p.getIndex();
|
|
692
|
+
while (p.getIndex() < source.length) {
|
|
693
|
+
p.skipBlanks();
|
|
694
|
+
if (p.char() === ']') {
|
|
695
|
+
var internalSubset = source.substring(intSubsetStart, p.getIndex());
|
|
696
|
+
p.skip(1);
|
|
697
|
+
return internalSubset;
|
|
698
|
+
}
|
|
699
|
+
var current = null;
|
|
700
|
+
// Only in external subset
|
|
701
|
+
// if (char() === '<' && char(1) === '!' && char(2) === '[') {
|
|
702
|
+
// parseConditionalSections(p, errorHandler);
|
|
703
|
+
// } else
|
|
704
|
+
if (p.char() === '<' && p.char(1) === '!') {
|
|
705
|
+
switch (p.char(2)) {
|
|
706
|
+
case 'E': // ELEMENT | ENTITY
|
|
707
|
+
if (p.char(3) === 'L') {
|
|
708
|
+
current = p.getMatch(g.elementdecl);
|
|
709
|
+
} else if (p.char(3) === 'N') {
|
|
710
|
+
current = p.getMatch(g.EntityDecl);
|
|
711
|
+
}
|
|
712
|
+
break;
|
|
713
|
+
case 'A': // ATTRIBUTE
|
|
714
|
+
current = p.getMatch(g.AttlistDecl);
|
|
715
|
+
break;
|
|
716
|
+
case 'N': // NOTATION
|
|
717
|
+
current = p.getMatch(g.NotationDecl);
|
|
718
|
+
break;
|
|
719
|
+
case '-': // COMMENT
|
|
720
|
+
current = p.getMatch(g.Comment);
|
|
721
|
+
break;
|
|
569
722
|
}
|
|
723
|
+
} else if (p.char() === '<' && p.char(1) === '?') {
|
|
724
|
+
current = parsePI(p, errorHandler);
|
|
725
|
+
} else if (p.char() === '%') {
|
|
726
|
+
current = p.getMatch(g.PEReference);
|
|
570
727
|
} else {
|
|
571
|
-
|
|
572
|
-
return -1;
|
|
728
|
+
return errorHandler.fatalError('Error detected in Markup declaration');
|
|
573
729
|
}
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
730
|
+
if (!current) {
|
|
731
|
+
return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
/**
|
|
739
|
+
* Called when the parser encounters an element starting with '<!'.
|
|
740
|
+
*
|
|
741
|
+
* @param {string} source
|
|
742
|
+
* The xml.
|
|
743
|
+
* @param {number} start
|
|
744
|
+
* the start index of the '<!'
|
|
745
|
+
* @param {DOMHandler} domBuilder
|
|
746
|
+
* @param {DOMHandler} errorHandler
|
|
747
|
+
* @param {boolean} isHTML
|
|
748
|
+
* @returns {number | never}
|
|
749
|
+
* The end index of the element.
|
|
750
|
+
* @throws {ParseError}
|
|
751
|
+
* In case the element is not well-formed.
|
|
752
|
+
*/
|
|
753
|
+
function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
|
|
754
|
+
var p = parseUtils(source, start);
|
|
755
|
+
|
|
756
|
+
switch (p.char(2)) {
|
|
757
|
+
case '-':
|
|
758
|
+
// should be a comment
|
|
759
|
+
var comment = p.getMatch(g.Comment);
|
|
760
|
+
if (comment) {
|
|
761
|
+
domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
|
|
762
|
+
return p.getIndex();
|
|
763
|
+
} else {
|
|
764
|
+
return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
|
|
765
|
+
}
|
|
766
|
+
case '[':
|
|
767
|
+
// should be CDATA
|
|
768
|
+
var cdata = p.getMatch(g.CDSect);
|
|
769
|
+
if (cdata) {
|
|
770
|
+
if (!isHTML && !domBuilder.currentElement) {
|
|
771
|
+
return errorHandler.fatalError('CDATA outside of element');
|
|
772
|
+
}
|
|
577
773
|
domBuilder.startCDATA();
|
|
578
|
-
domBuilder.characters(
|
|
774
|
+
domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
|
|
579
775
|
domBuilder.endCDATA();
|
|
580
|
-
return
|
|
776
|
+
return p.getIndex();
|
|
777
|
+
} else {
|
|
778
|
+
return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
|
|
581
779
|
}
|
|
582
|
-
|
|
583
|
-
//
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
780
|
+
case 'D': {
|
|
781
|
+
// should be DOCTYPE
|
|
782
|
+
if (domBuilder.doc && domBuilder.doc.documentElement) {
|
|
783
|
+
return errorHandler.fatalError('Doctype not allowed inside or after documentElement at position ' + p.getIndex());
|
|
784
|
+
}
|
|
785
|
+
if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
|
|
786
|
+
return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
787
|
+
}
|
|
788
|
+
p.skip(g.DOCTYPE_DECL_START.length);
|
|
789
|
+
if (p.skipBlanks() < 1) {
|
|
790
|
+
return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
var doctype = {
|
|
794
|
+
name: undefined,
|
|
795
|
+
publicId: undefined,
|
|
796
|
+
systemId: undefined,
|
|
797
|
+
internalSubset: undefined,
|
|
798
|
+
};
|
|
799
|
+
// Parse the DOCTYPE name
|
|
800
|
+
doctype.name = p.getMatch(g.Name);
|
|
801
|
+
if (!doctype.name)
|
|
802
|
+
return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
|
|
803
|
+
p.skipBlanks();
|
|
804
|
+
|
|
805
|
+
// Check for ExternalID
|
|
806
|
+
if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
|
|
807
|
+
var match = g.ExternalID_match.exec(p.substringFromIndex());
|
|
808
|
+
if (!match) {
|
|
809
|
+
return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
|
|
810
|
+
}
|
|
811
|
+
if (match.groups.SystemLiteralOnly !== undefined) {
|
|
812
|
+
doctype.systemId = match.groups.SystemLiteralOnly;
|
|
813
|
+
} else {
|
|
814
|
+
doctype.systemId = match.groups.SystemLiteral;
|
|
815
|
+
doctype.publicId = match.groups.PubidLiteral;
|
|
597
816
|
}
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
domBuilder.endDTD();
|
|
817
|
+
p.skip(match[0].length);
|
|
818
|
+
}
|
|
601
819
|
|
|
602
|
-
|
|
820
|
+
p.skipBlanks();
|
|
821
|
+
doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
|
|
822
|
+
p.skipBlanks();
|
|
823
|
+
if (p.char() !== '>') {
|
|
824
|
+
return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
|
|
603
825
|
}
|
|
826
|
+
p.skip(1);
|
|
827
|
+
domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
|
|
828
|
+
domBuilder.endDTD();
|
|
829
|
+
return p.getIndex();
|
|
830
|
+
}
|
|
831
|
+
default:
|
|
832
|
+
return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
|
|
604
833
|
}
|
|
605
|
-
return -1;
|
|
606
834
|
}
|
|
607
835
|
|
|
608
|
-
function
|
|
609
|
-
var
|
|
610
|
-
if (
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
836
|
+
function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
|
|
837
|
+
var match = source.substring(start).match(g.PI);
|
|
838
|
+
if (!match) {
|
|
839
|
+
return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
|
|
840
|
+
}
|
|
841
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
842
|
+
if (start > 0) {
|
|
843
|
+
return errorHandler.fatalError(
|
|
844
|
+
'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
|
|
845
|
+
);
|
|
846
|
+
}
|
|
847
|
+
if (!g.XMLDecl.test(source.substring(start))) {
|
|
848
|
+
return errorHandler.fatalError('xml declaration is not well-formed');
|
|
618
849
|
}
|
|
619
850
|
}
|
|
620
|
-
|
|
851
|
+
domBuilder.processingInstruction(match[1], match[2]);
|
|
852
|
+
return start + match[0].length;
|
|
621
853
|
}
|
|
622
854
|
|
|
623
855
|
function ElementAttributes() {
|
|
624
|
-
this.attributeNames =
|
|
856
|
+
this.attributeNames = Object.create(null);
|
|
625
857
|
}
|
|
858
|
+
|
|
626
859
|
ElementAttributes.prototype = {
|
|
627
860
|
setTagName: function (tagName) {
|
|
628
|
-
if (!
|
|
861
|
+
if (!g.QName_exact.test(tagName)) {
|
|
629
862
|
throw new Error('invalid tagName:' + tagName);
|
|
630
863
|
}
|
|
631
864
|
this.tagName = tagName;
|
|
632
865
|
},
|
|
633
866
|
addValue: function (qName, value, offset) {
|
|
634
|
-
if (!
|
|
867
|
+
if (!g.QName_exact.test(qName)) {
|
|
635
868
|
throw new Error('invalid attribute:' + qName);
|
|
636
869
|
}
|
|
637
870
|
this.attributeNames[qName] = this.length;
|
|
@@ -665,17 +898,6 @@ ElementAttributes.prototype = {
|
|
|
665
898
|
// getType:function(i){},
|
|
666
899
|
};
|
|
667
900
|
|
|
668
|
-
function split(source, start) {
|
|
669
|
-
var match;
|
|
670
|
-
var buf = [];
|
|
671
|
-
var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
|
|
672
|
-
reg.lastIndex = start;
|
|
673
|
-
reg.exec(source); //skip <
|
|
674
|
-
while ((match = reg.exec(source))) {
|
|
675
|
-
buf.push(match);
|
|
676
|
-
if (match[1]) return buf;
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
|
|
680
901
|
exports.XMLReader = XMLReader;
|
|
681
|
-
exports.
|
|
902
|
+
exports.parseUtils = parseUtils;
|
|
903
|
+
exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;
|