@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +302 -8
- package/SECURITY.md +8 -8
- package/index.d.ts +369 -21
- package/lib/.eslintrc.yml +1 -0
- package/lib/conventions.js +192 -112
- package/lib/dom-parser.js +301 -232
- package/lib/dom.js +1465 -871
- package/lib/entities.js +2150 -254
- package/lib/grammar.js +528 -0
- package/lib/index.js +19 -5
- package/lib/sax.js +717 -479
- package/package.json +71 -67
- package/readme.md +31 -42
package/lib/sax.js
CHANGED
|
@@ -1,246 +1,260 @@
|
|
|
1
|
-
'use strict'
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var conventions = require('./conventions');
|
|
4
|
+
var g = require('./grammar');
|
|
2
5
|
|
|
3
|
-
var conventions = require("./conventions");
|
|
4
|
-
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
|
5
6
|
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
|
|
7
|
+
var isHTMLMimeType = conventions.isHTMLMimeType;
|
|
8
|
+
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
|
6
9
|
var NAMESPACE = conventions.NAMESPACE;
|
|
7
|
-
var
|
|
8
|
-
|
|
9
|
-
//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
|
10
|
-
//[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
|
11
|
-
//[5] Name ::= NameStartChar (NameChar)*
|
|
12
|
-
var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
|
|
13
|
-
var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
|
|
14
|
-
var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
|
|
15
|
-
//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
|
|
10
|
+
var ParseError = conventions.ParseError;
|
|
11
|
+
|
|
16
12
|
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
|
|
17
13
|
|
|
18
14
|
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
19
15
|
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
|
20
|
-
var S_TAG = 0
|
|
21
|
-
var S_ATTR = 1
|
|
22
|
-
var S_ATTR_SPACE=2
|
|
23
|
-
var S_EQ = 3
|
|
24
|
-
var S_ATTR_NOQUOT_VALUE = 4
|
|
25
|
-
var S_ATTR_END = 5
|
|
26
|
-
var S_TAG_SPACE = 6
|
|
27
|
-
var S_TAG_CLOSE = 7
|
|
16
|
+
var S_TAG = 0; //tag name offerring
|
|
17
|
+
var S_ATTR = 1; //attr name offerring
|
|
18
|
+
var S_ATTR_SPACE = 2; //attr name end and space offer
|
|
19
|
+
var S_EQ = 3; //=space?
|
|
20
|
+
var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
|
|
21
|
+
var S_ATTR_END = 5; //attr value end and no space(quot end)
|
|
22
|
+
var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
|
|
23
|
+
var S_TAG_CLOSE = 7; //closed el<el />
|
|
28
24
|
|
|
29
|
-
|
|
30
|
-
* Creates an error that will not be caught by XMLReader aka the SAX parser.
|
|
31
|
-
*
|
|
32
|
-
* @param {string} message
|
|
33
|
-
* @param {any?} locator Optional, can provide details about the location in the source
|
|
34
|
-
* @constructor
|
|
35
|
-
*/
|
|
36
|
-
function ParseError(message, locator) {
|
|
37
|
-
this.message = message
|
|
38
|
-
this.locator = locator
|
|
39
|
-
if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
|
|
40
|
-
}
|
|
41
|
-
ParseError.prototype = new Error();
|
|
42
|
-
ParseError.prototype.name = ParseError.name
|
|
43
|
-
|
|
44
|
-
function XMLReader(){
|
|
45
|
-
|
|
46
|
-
}
|
|
25
|
+
function XMLReader() {}
|
|
47
26
|
|
|
48
27
|
XMLReader.prototype = {
|
|
49
|
-
parse:function(source,defaultNSMap,entityMap){
|
|
28
|
+
parse: function (source, defaultNSMap, entityMap) {
|
|
50
29
|
var domBuilder = this.domBuilder;
|
|
51
30
|
domBuilder.startDocument();
|
|
52
|
-
_copy(defaultNSMap
|
|
53
|
-
parse(source,defaultNSMap,entityMap,
|
|
54
|
-
domBuilder,this.errorHandler);
|
|
31
|
+
_copy(defaultNSMap, (defaultNSMap = {}));
|
|
32
|
+
parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
|
|
55
33
|
domBuilder.endDocument();
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Detecting everything that might be a reference,
|
|
39
|
+
* including those without ending `;`, since those are allowed in HTML.
|
|
40
|
+
* The entityReplacer takes care of verifying and transforming each occurrence,
|
|
41
|
+
* and reports to the errorHandler on those that are not OK,
|
|
42
|
+
* depending on the context.
|
|
43
|
+
*/
|
|
44
|
+
var ENTITY_REG = /&#?\w+;?/g;
|
|
45
|
+
|
|
46
|
+
function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
47
|
+
var isHTML = isHTMLMimeType(domBuilder.mimeType);
|
|
48
|
+
if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
|
|
49
|
+
return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
|
|
56
50
|
}
|
|
57
|
-
|
|
58
|
-
function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
|
|
59
|
-
var isHTML = MIME_TYPE.isHTML(domBuilder.mimeType);
|
|
51
|
+
|
|
60
52
|
function fixedFromCharCode(code) {
|
|
61
53
|
// String.prototype.fromCharCode does not supports
|
|
62
54
|
// > 2 bytes unicode chars directly
|
|
63
55
|
if (code > 0xffff) {
|
|
64
56
|
code -= 0x10000;
|
|
65
|
-
var surrogate1 = 0xd800 + (code >> 10)
|
|
66
|
-
|
|
57
|
+
var surrogate1 = 0xd800 + (code >> 10),
|
|
58
|
+
surrogate2 = 0xdc00 + (code & 0x3ff);
|
|
67
59
|
|
|
68
60
|
return String.fromCharCode(surrogate1, surrogate2);
|
|
69
61
|
} else {
|
|
70
62
|
return String.fromCharCode(code);
|
|
71
63
|
}
|
|
72
64
|
}
|
|
73
|
-
|
|
74
|
-
|
|
65
|
+
|
|
66
|
+
function entityReplacer(a) {
|
|
67
|
+
var complete = a[a.length - 1] === ';' ? a : a + ';';
|
|
68
|
+
if (!isHTML && complete !== a) {
|
|
69
|
+
errorHandler.error('EntityRef: expecting ;');
|
|
70
|
+
return a;
|
|
71
|
+
}
|
|
72
|
+
var match = g.Reference.exec(complete);
|
|
73
|
+
if (!match || match[0].length !== complete.length) {
|
|
74
|
+
errorHandler.error('entity not matching Reference production: ' + a);
|
|
75
|
+
return a;
|
|
76
|
+
}
|
|
77
|
+
var k = complete.slice(1, -1);
|
|
75
78
|
if (Object.hasOwnProperty.call(entityMap, k)) {
|
|
76
79
|
return entityMap[k];
|
|
77
|
-
}else if(k.charAt(0) === '#'){
|
|
78
|
-
return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
|
|
79
|
-
}else{
|
|
80
|
-
errorHandler.error('entity not found:'+a);
|
|
80
|
+
} else if (k.charAt(0) === '#') {
|
|
81
|
+
return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
|
|
82
|
+
} else {
|
|
83
|
+
errorHandler.error('entity not found:' + a);
|
|
81
84
|
return a;
|
|
82
85
|
}
|
|
83
86
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
87
|
+
|
|
88
|
+
function appendText(end) {
|
|
89
|
+
//has some bugs
|
|
90
|
+
if (end > start) {
|
|
91
|
+
var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
|
|
92
|
+
locator && position(start);
|
|
93
|
+
domBuilder.characters(xt, 0, end - start);
|
|
94
|
+
start = end;
|
|
90
95
|
}
|
|
91
96
|
}
|
|
92
|
-
|
|
93
|
-
|
|
97
|
+
|
|
98
|
+
function position(p, m) {
|
|
99
|
+
while (p >= lineEnd && (m = linePattern.exec(source))) {
|
|
94
100
|
lineStart = m.index;
|
|
95
101
|
lineEnd = lineStart + m[0].length;
|
|
96
102
|
locator.lineNumber++;
|
|
97
|
-
//console.log('line++:',locator,startPos,endPos)
|
|
98
103
|
}
|
|
99
|
-
locator.columnNumber = p-lineStart+1;
|
|
104
|
+
locator.columnNumber = p - lineStart + 1;
|
|
100
105
|
}
|
|
106
|
+
|
|
101
107
|
var lineStart = 0;
|
|
102
108
|
var lineEnd = 0;
|
|
103
|
-
var linePattern = /.*(?:\r\n?|\n)|.*$/g
|
|
109
|
+
var linePattern = /.*(?:\r\n?|\n)|.*$/g;
|
|
104
110
|
var locator = domBuilder.locator;
|
|
105
111
|
|
|
106
|
-
var parseStack = [{currentNSMap:defaultNSMapCopy}]
|
|
107
|
-
var
|
|
112
|
+
var parseStack = [{ currentNSMap: defaultNSMapCopy }];
|
|
113
|
+
var unclosedTags = [];
|
|
108
114
|
var start = 0;
|
|
109
|
-
while(true){
|
|
110
|
-
try{
|
|
111
|
-
var tagStart = source.indexOf('<',start);
|
|
112
|
-
if(tagStart<0){
|
|
113
|
-
if(!
|
|
115
|
+
while (true) {
|
|
116
|
+
try {
|
|
117
|
+
var tagStart = source.indexOf('<', start);
|
|
118
|
+
if (tagStart < 0) {
|
|
119
|
+
if (!isHTML && unclosedTags.length > 0) {
|
|
120
|
+
return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
|
|
121
|
+
}
|
|
122
|
+
if (!source.substring(start).match(/^\s*$/)) {
|
|
114
123
|
var doc = domBuilder.doc;
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
124
|
+
var text = doc.createTextNode(source.substr(start));
|
|
125
|
+
if (doc.documentElement) {
|
|
126
|
+
return errorHandler.error('Extra content at the end of the document');
|
|
127
|
+
}
|
|
128
|
+
doc.appendChild(text);
|
|
129
|
+
domBuilder.currentElement = text;
|
|
118
130
|
}
|
|
119
131
|
return;
|
|
120
132
|
}
|
|
121
|
-
if(tagStart>start){
|
|
133
|
+
if (tagStart > start) {
|
|
134
|
+
var fromSource = source.substring(start, tagStart);
|
|
135
|
+
if (!isHTML && unclosedTags.length === 0) {
|
|
136
|
+
fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
|
|
137
|
+
fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
|
|
138
|
+
}
|
|
122
139
|
appendText(tagStart);
|
|
123
140
|
}
|
|
124
|
-
switch(source.charAt(tagStart+1)){
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
|
|
132
|
-
errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
|
|
133
|
-
end = tagStart+1+tagName.length;
|
|
134
|
-
}else if(tagName.match(/\s</)){
|
|
135
|
-
tagName = tagName.replace(/[\s<].*/,'');
|
|
136
|
-
errorHandler.error("end tag name: "+tagName+' maybe not complete');
|
|
137
|
-
end = tagStart+1+tagName.length;
|
|
138
|
-
}
|
|
139
|
-
var localNSMap = config.localNSMap;
|
|
140
|
-
var endMatch = config.tagName == tagName;
|
|
141
|
-
var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
|
|
142
|
-
if(endIgnoreCaseMach){
|
|
143
|
-
domBuilder.endElement(config.uri,config.localName,tagName);
|
|
144
|
-
if(localNSMap){
|
|
145
|
-
for(var prefix in localNSMap){
|
|
146
|
-
domBuilder.endPrefixMapping(prefix) ;
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
if(!endMatch){
|
|
150
|
-
errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
|
|
141
|
+
switch (source.charAt(tagStart + 1)) {
|
|
142
|
+
case '/':
|
|
143
|
+
var end = source.indexOf('>', tagStart + 2);
|
|
144
|
+
var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
|
|
145
|
+
if (!tagNameRaw) {
|
|
146
|
+
return errorHandler.fatalError('end tag name missing');
|
|
151
147
|
}
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
end++;
|
|
157
|
-
break;
|
|
158
|
-
// end elment
|
|
159
|
-
case '?':// <?...?>
|
|
160
|
-
locator&&position(tagStart);
|
|
161
|
-
end = parseInstruction(source,tagStart,domBuilder);
|
|
162
|
-
break;
|
|
163
|
-
case '!':// <!doctype,<![CDATA,<!--
|
|
164
|
-
locator&&position(tagStart);
|
|
165
|
-
end = parseDCC(source,tagStart,domBuilder,errorHandler);
|
|
166
|
-
break;
|
|
167
|
-
default:
|
|
168
|
-
locator&&position(tagStart);
|
|
169
|
-
var el = new ElementAttributes();
|
|
170
|
-
var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
|
171
|
-
//elStartEnd
|
|
172
|
-
var end = parseElementStartPart(
|
|
173
|
-
source,
|
|
174
|
-
tagStart,
|
|
175
|
-
el,
|
|
176
|
-
currentNSMap,
|
|
177
|
-
entityReplacer,
|
|
178
|
-
errorHandler,
|
|
179
|
-
isHTML
|
|
180
|
-
)
|
|
181
|
-
var len = el.length;
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
|
|
185
|
-
el.closed = true;
|
|
186
|
-
if(!isHTML){
|
|
187
|
-
errorHandler.warning('unclosed xml attribute');
|
|
148
|
+
var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
|
|
149
|
+
if (!tagNameMatch) {
|
|
150
|
+
return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
|
|
188
151
|
}
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
for(var i = 0;i<len;i++){
|
|
194
|
-
var a = el[i];
|
|
195
|
-
position(a.offset);
|
|
196
|
-
a.locator = copyLocator(locator,{});
|
|
152
|
+
if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
|
|
153
|
+
// not enough information to provide a helpful error message,
|
|
154
|
+
// but parsing will throw since there is no root element
|
|
155
|
+
return;
|
|
197
156
|
}
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
157
|
+
var currentTagName =
|
|
158
|
+
unclosedTags[unclosedTags.length - 1] ||
|
|
159
|
+
domBuilder.currentElement.tagName ||
|
|
160
|
+
domBuilder.doc.documentElement.tagName ||
|
|
161
|
+
'';
|
|
162
|
+
if (currentTagName !== tagNameMatch[1]) {
|
|
163
|
+
var tagNameLower = tagNameMatch[1].toLowerCase();
|
|
164
|
+
if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
|
|
165
|
+
return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
|
|
166
|
+
}
|
|
201
167
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
168
|
+
var config = parseStack.pop();
|
|
169
|
+
unclosedTags.pop();
|
|
170
|
+
var localNSMap = config.localNSMap;
|
|
171
|
+
domBuilder.endElement(config.uri, config.localName, currentTagName);
|
|
172
|
+
if (localNSMap) {
|
|
173
|
+
for (var prefix in localNSMap) {
|
|
174
|
+
if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
|
|
175
|
+
domBuilder.endPrefixMapping(prefix);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
206
178
|
}
|
|
207
|
-
}
|
|
208
179
|
|
|
209
|
-
if (isHTML && !el.closed) {
|
|
210
|
-
end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
|
|
211
|
-
} else {
|
|
212
180
|
end++;
|
|
213
|
-
|
|
181
|
+
break;
|
|
182
|
+
// end element
|
|
183
|
+
case '?': // <?...?>
|
|
184
|
+
locator && position(tagStart);
|
|
185
|
+
end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
|
|
186
|
+
break;
|
|
187
|
+
case '!': // <!doctype,<![CDATA,<!--
|
|
188
|
+
locator && position(tagStart);
|
|
189
|
+
end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
|
|
190
|
+
break;
|
|
191
|
+
default:
|
|
192
|
+
locator && position(tagStart);
|
|
193
|
+
var el = new ElementAttributes();
|
|
194
|
+
var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
|
|
195
|
+
//elStartEnd
|
|
196
|
+
var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
|
|
197
|
+
var len = el.length;
|
|
198
|
+
|
|
199
|
+
if (!el.closed) {
|
|
200
|
+
if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
|
|
201
|
+
el.closed = true;
|
|
202
|
+
} else {
|
|
203
|
+
unclosedTags.push(el.tagName);
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
if (locator && len) {
|
|
207
|
+
var locator2 = copyLocator(locator, {});
|
|
208
|
+
//try{//attribute position fixed
|
|
209
|
+
for (var i = 0; i < len; i++) {
|
|
210
|
+
var a = el[i];
|
|
211
|
+
position(a.offset);
|
|
212
|
+
a.locator = copyLocator(locator, {});
|
|
213
|
+
}
|
|
214
|
+
domBuilder.locator = locator2;
|
|
215
|
+
if (appendElement(el, domBuilder, currentNSMap)) {
|
|
216
|
+
parseStack.push(el);
|
|
217
|
+
}
|
|
218
|
+
domBuilder.locator = locator;
|
|
219
|
+
} else {
|
|
220
|
+
if (appendElement(el, domBuilder, currentNSMap)) {
|
|
221
|
+
parseStack.push(el);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (isHTML && !el.closed) {
|
|
226
|
+
end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
|
|
227
|
+
} else {
|
|
228
|
+
end++;
|
|
229
|
+
}
|
|
214
230
|
}
|
|
215
|
-
}catch(e){
|
|
231
|
+
} catch (e) {
|
|
216
232
|
if (e instanceof ParseError) {
|
|
217
233
|
throw e;
|
|
218
234
|
}
|
|
219
|
-
errorHandler.error('element parse error: '+e)
|
|
235
|
+
errorHandler.error('element parse error: ' + e);
|
|
220
236
|
end = -1;
|
|
221
237
|
}
|
|
222
|
-
if(end>start){
|
|
238
|
+
if (end > start) {
|
|
223
239
|
start = end;
|
|
224
|
-
}else{
|
|
225
|
-
//
|
|
226
|
-
appendText(Math.max(tagStart,start)+1);
|
|
240
|
+
} else {
|
|
241
|
+
//Possible sax fallback here, risk of positional error
|
|
242
|
+
appendText(Math.max(tagStart, start) + 1);
|
|
227
243
|
}
|
|
228
244
|
}
|
|
229
245
|
}
|
|
230
|
-
|
|
246
|
+
|
|
247
|
+
function copyLocator(f, t) {
|
|
231
248
|
t.lineNumber = f.lineNumber;
|
|
232
249
|
t.columnNumber = f.columnNumber;
|
|
233
250
|
return t;
|
|
234
251
|
}
|
|
235
252
|
|
|
236
253
|
/**
|
|
237
|
-
* @
|
|
238
|
-
* @
|
|
254
|
+
* @returns end of the elementStartPart(end of elementEndPart for selfClosed el)
|
|
255
|
+
* @see {@link #appendElement}
|
|
239
256
|
*/
|
|
240
|
-
function parseElementStartPart(
|
|
241
|
-
source,start,el,currentNSMap,entityReplacer,errorHandler, isHTML
|
|
242
|
-
){
|
|
243
|
-
|
|
257
|
+
function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
|
|
244
258
|
/**
|
|
245
259
|
* @param {string} qname
|
|
246
260
|
* @param {string} value
|
|
@@ -248,7 +262,10 @@ function parseElementStartPart(
|
|
|
248
262
|
*/
|
|
249
263
|
function addAttribute(qname, value, startIndex) {
|
|
250
264
|
if (el.attributeNames.hasOwnProperty(qname)) {
|
|
251
|
-
errorHandler.fatalError('Attribute ' + qname + ' redefined')
|
|
265
|
+
return errorHandler.fatalError('Attribute ' + qname + ' redefined');
|
|
266
|
+
}
|
|
267
|
+
if (!isHTML && value.indexOf('<') >= 0) {
|
|
268
|
+
return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
|
|
252
269
|
}
|
|
253
270
|
el.addValue(
|
|
254
271
|
qname,
|
|
@@ -256,406 +273,627 @@ function parseElementStartPart(
|
|
|
256
273
|
// since the xmldom sax parser does not "interpret" DTD the following is not implemented:
|
|
257
274
|
// - recursive replacement of (DTD) entity references
|
|
258
275
|
// - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
|
|
259
|
-
value.replace(/[\t\n\r]/g, ' ').replace(
|
|
276
|
+
value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
|
|
260
277
|
startIndex
|
|
261
|
-
)
|
|
278
|
+
);
|
|
262
279
|
}
|
|
280
|
+
|
|
263
281
|
var attrName;
|
|
264
282
|
var value;
|
|
265
283
|
var p = ++start;
|
|
266
|
-
var s = S_TAG
|
|
267
|
-
while(true){
|
|
284
|
+
var s = S_TAG; //status
|
|
285
|
+
while (true) {
|
|
268
286
|
var c = source.charAt(p);
|
|
269
|
-
switch(c){
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
s
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
break;
|
|
281
|
-
case '\'':
|
|
282
|
-
case '"':
|
|
283
|
-
if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
|
|
284
|
-
){//equal
|
|
285
|
-
if(s === S_ATTR){
|
|
286
|
-
errorHandler.warning('attribute value must after "="')
|
|
287
|
-
attrName = source.slice(start,p)
|
|
287
|
+
switch (c) {
|
|
288
|
+
case '=':
|
|
289
|
+
if (s === S_ATTR) {
|
|
290
|
+
//attrName
|
|
291
|
+
attrName = source.slice(start, p);
|
|
292
|
+
s = S_EQ;
|
|
293
|
+
} else if (s === S_ATTR_SPACE) {
|
|
294
|
+
s = S_EQ;
|
|
295
|
+
} else {
|
|
296
|
+
//fatalError: equal must after attrName or space after attrName
|
|
297
|
+
throw new Error('attribute equal must after attrName'); // No known test case
|
|
288
298
|
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
299
|
+
break;
|
|
300
|
+
case "'":
|
|
301
|
+
case '"':
|
|
302
|
+
if (
|
|
303
|
+
s === S_EQ ||
|
|
304
|
+
s === S_ATTR //|| s == S_ATTR_SPACE
|
|
305
|
+
) {
|
|
306
|
+
//equal
|
|
307
|
+
if (s === S_ATTR) {
|
|
308
|
+
errorHandler.warning('attribute value must after "="');
|
|
309
|
+
attrName = source.slice(start, p);
|
|
310
|
+
}
|
|
311
|
+
start = p + 1;
|
|
312
|
+
p = source.indexOf(c, start);
|
|
313
|
+
if (p > 0) {
|
|
314
|
+
value = source.slice(start, p);
|
|
315
|
+
addAttribute(attrName, value, start - 1);
|
|
316
|
+
s = S_ATTR_END;
|
|
317
|
+
} else {
|
|
318
|
+
//fatalError: no end quot match
|
|
319
|
+
throw new Error("attribute value no end '" + c + "' match");
|
|
320
|
+
}
|
|
321
|
+
} else if (s == S_ATTR_NOQUOT_VALUE) {
|
|
292
322
|
value = source.slice(start, p);
|
|
293
|
-
addAttribute(attrName, value, start
|
|
323
|
+
addAttribute(attrName, value, start);
|
|
324
|
+
errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
|
|
325
|
+
start = p + 1;
|
|
294
326
|
s = S_ATTR_END;
|
|
295
|
-
}else{
|
|
296
|
-
//fatalError: no
|
|
297
|
-
throw new Error('attribute value
|
|
327
|
+
} else {
|
|
328
|
+
//fatalError: no equal before
|
|
329
|
+
throw new Error('attribute value must after "="'); // No known test case
|
|
298
330
|
}
|
|
299
|
-
}else if(s == S_ATTR_NOQUOT_VALUE){
|
|
300
|
-
value = source.slice(start, p);
|
|
301
|
-
addAttribute(attrName, value, start);
|
|
302
|
-
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
|
|
303
|
-
start = p+1;
|
|
304
|
-
s = S_ATTR_END
|
|
305
|
-
}else{
|
|
306
|
-
//fatalError: no equal before
|
|
307
|
-
throw new Error('attribute value must after "="'); // No known test case
|
|
308
|
-
}
|
|
309
|
-
break;
|
|
310
|
-
case '/':
|
|
311
|
-
switch(s){
|
|
312
|
-
case S_TAG:
|
|
313
|
-
el.setTagName(source.slice(start,p));
|
|
314
|
-
case S_ATTR_END:
|
|
315
|
-
case S_TAG_SPACE:
|
|
316
|
-
case S_TAG_CLOSE:
|
|
317
|
-
s =S_TAG_CLOSE;
|
|
318
|
-
el.closed = true;
|
|
319
|
-
case S_ATTR_NOQUOT_VALUE:
|
|
320
|
-
case S_ATTR:
|
|
321
|
-
case S_ATTR_SPACE:
|
|
322
331
|
break;
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
break;//normal
|
|
342
|
-
case S_ATTR_NOQUOT_VALUE://Compatible state
|
|
343
|
-
case S_ATTR:
|
|
344
|
-
value = source.slice(start,p);
|
|
345
|
-
if(value.slice(-1) === '/'){
|
|
346
|
-
el.closed = true;
|
|
347
|
-
value = value.slice(0,-1)
|
|
348
|
-
}
|
|
349
|
-
case S_ATTR_SPACE:
|
|
350
|
-
if(s === S_ATTR_SPACE){
|
|
351
|
-
value = attrName;
|
|
352
|
-
}
|
|
353
|
-
if(s == S_ATTR_NOQUOT_VALUE){
|
|
354
|
-
errorHandler.warning('attribute "'+value+'" missed quot(")!');
|
|
355
|
-
addAttribute(attrName, value, start)
|
|
356
|
-
}else{
|
|
357
|
-
if(!isHTML){
|
|
358
|
-
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
|
|
359
|
-
}
|
|
360
|
-
addAttribute(value, value, start)
|
|
332
|
+
case '/':
|
|
333
|
+
switch (s) {
|
|
334
|
+
case S_TAG:
|
|
335
|
+
el.setTagName(source.slice(start, p));
|
|
336
|
+
case S_ATTR_END:
|
|
337
|
+
case S_TAG_SPACE:
|
|
338
|
+
case S_TAG_CLOSE:
|
|
339
|
+
s = S_TAG_CLOSE;
|
|
340
|
+
el.closed = true;
|
|
341
|
+
case S_ATTR_NOQUOT_VALUE:
|
|
342
|
+
case S_ATTR:
|
|
343
|
+
break;
|
|
344
|
+
case S_ATTR_SPACE:
|
|
345
|
+
el.closed = true;
|
|
346
|
+
break;
|
|
347
|
+
//case S_EQ:
|
|
348
|
+
default:
|
|
349
|
+
throw new Error("attribute invalid close char('/')"); // No known test case
|
|
361
350
|
}
|
|
362
351
|
break;
|
|
363
|
-
case
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
return p;
|
|
368
|
-
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
369
|
-
case '\u0080':
|
|
370
|
-
c = ' ';
|
|
371
|
-
default:
|
|
372
|
-
if(c<= ' '){//space
|
|
373
|
-
switch(s){
|
|
374
|
-
case S_TAG:
|
|
375
|
-
el.setTagName(source.slice(start,p));//tagName
|
|
376
|
-
s = S_TAG_SPACE;
|
|
377
|
-
break;
|
|
378
|
-
case S_ATTR:
|
|
379
|
-
attrName = source.slice(start,p)
|
|
380
|
-
s = S_ATTR_SPACE;
|
|
381
|
-
break;
|
|
382
|
-
case S_ATTR_NOQUOT_VALUE:
|
|
383
|
-
var value = source.slice(start, p);
|
|
384
|
-
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
|
|
385
|
-
addAttribute(attrName, value, start)
|
|
386
|
-
case S_ATTR_END:
|
|
387
|
-
s = S_TAG_SPACE;
|
|
388
|
-
break;
|
|
389
|
-
//case S_TAG_SPACE:
|
|
390
|
-
//case S_EQ:
|
|
391
|
-
//case S_ATTR_SPACE:
|
|
392
|
-
// void();break;
|
|
393
|
-
//case S_TAG_CLOSE:
|
|
394
|
-
//ignore warning
|
|
352
|
+
case '': //end document
|
|
353
|
+
errorHandler.error('unexpected end of input');
|
|
354
|
+
if (s == S_TAG) {
|
|
355
|
+
el.setTagName(source.slice(start, p));
|
|
395
356
|
}
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
357
|
+
return p;
|
|
358
|
+
case '>':
|
|
359
|
+
switch (s) {
|
|
360
|
+
case S_TAG:
|
|
361
|
+
el.setTagName(source.slice(start, p));
|
|
362
|
+
case S_ATTR_END:
|
|
363
|
+
case S_TAG_SPACE:
|
|
364
|
+
case S_TAG_CLOSE:
|
|
365
|
+
break; //normal
|
|
366
|
+
case S_ATTR_NOQUOT_VALUE: //Compatible state
|
|
367
|
+
case S_ATTR:
|
|
368
|
+
value = source.slice(start, p);
|
|
369
|
+
if (value.slice(-1) === '/') {
|
|
370
|
+
el.closed = true;
|
|
371
|
+
value = value.slice(0, -1);
|
|
372
|
+
}
|
|
373
|
+
case S_ATTR_SPACE:
|
|
374
|
+
if (s === S_ATTR_SPACE) {
|
|
375
|
+
value = attrName;
|
|
376
|
+
}
|
|
377
|
+
if (s == S_ATTR_NOQUOT_VALUE) {
|
|
378
|
+
errorHandler.warning('attribute "' + value + '" missed quot(")!');
|
|
379
|
+
addAttribute(attrName, value, start);
|
|
380
|
+
} else {
|
|
381
|
+
if (!isHTML) {
|
|
382
|
+
errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
|
|
383
|
+
}
|
|
384
|
+
addAttribute(value, value, start);
|
|
385
|
+
}
|
|
386
|
+
break;
|
|
387
|
+
case S_EQ:
|
|
388
|
+
if (!isHTML) {
|
|
389
|
+
return errorHandler.fatalError('AttValue: \' or " expected');
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
return p;
|
|
393
|
+
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
394
|
+
case '\u0080':
|
|
395
|
+
c = ' ';
|
|
396
|
+
default:
|
|
397
|
+
if (c <= ' ') {
|
|
398
|
+
//space
|
|
399
|
+
switch (s) {
|
|
400
|
+
case S_TAG:
|
|
401
|
+
el.setTagName(source.slice(start, p)); //tagName
|
|
402
|
+
s = S_TAG_SPACE;
|
|
403
|
+
break;
|
|
404
|
+
case S_ATTR:
|
|
405
|
+
attrName = source.slice(start, p);
|
|
406
|
+
s = S_ATTR_SPACE;
|
|
407
|
+
break;
|
|
408
|
+
case S_ATTR_NOQUOT_VALUE:
|
|
409
|
+
var value = source.slice(start, p);
|
|
410
|
+
errorHandler.warning('attribute "' + value + '" missed quot(")!!');
|
|
411
|
+
addAttribute(attrName, value, start);
|
|
412
|
+
case S_ATTR_END:
|
|
413
|
+
s = S_TAG_SPACE;
|
|
414
|
+
break;
|
|
415
|
+
//case S_TAG_SPACE:
|
|
416
|
+
//case S_EQ:
|
|
417
|
+
//case S_ATTR_SPACE:
|
|
418
|
+
// void();break;
|
|
419
|
+
//case S_TAG_CLOSE:
|
|
420
|
+
//ignore warning
|
|
421
|
+
}
|
|
422
|
+
} else {
|
|
423
|
+
//not space
|
|
424
|
+
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
425
|
+
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
|
426
|
+
switch (s) {
|
|
427
|
+
//case S_TAG:void();break;
|
|
428
|
+
//case S_ATTR:void();break;
|
|
429
|
+
//case S_ATTR_NOQUOT_VALUE:void();break;
|
|
430
|
+
case S_ATTR_SPACE:
|
|
431
|
+
if (!isHTML) {
|
|
432
|
+
errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
|
|
433
|
+
}
|
|
434
|
+
addAttribute(attrName, attrName, start);
|
|
435
|
+
start = p;
|
|
436
|
+
s = S_ATTR;
|
|
437
|
+
break;
|
|
438
|
+
case S_ATTR_END:
|
|
439
|
+
errorHandler.warning('attribute space is required"' + attrName + '"!!');
|
|
440
|
+
case S_TAG_SPACE:
|
|
441
|
+
s = S_ATTR;
|
|
442
|
+
start = p;
|
|
443
|
+
break;
|
|
444
|
+
case S_EQ:
|
|
445
|
+
s = S_ATTR_NOQUOT_VALUE;
|
|
446
|
+
start = p;
|
|
447
|
+
break;
|
|
448
|
+
case S_TAG_CLOSE:
|
|
449
|
+
throw new Error("elements closed character '/' and '>' must be connected to");
|
|
407
450
|
}
|
|
408
|
-
addAttribute(attrName, attrName, start);
|
|
409
|
-
start = p;
|
|
410
|
-
s = S_ATTR;
|
|
411
|
-
break;
|
|
412
|
-
case S_ATTR_END:
|
|
413
|
-
errorHandler.warning('attribute space is required"'+attrName+'"!!')
|
|
414
|
-
case S_TAG_SPACE:
|
|
415
|
-
s = S_ATTR;
|
|
416
|
-
start = p;
|
|
417
|
-
break;
|
|
418
|
-
case S_EQ:
|
|
419
|
-
s = S_ATTR_NOQUOT_VALUE;
|
|
420
|
-
start = p;
|
|
421
|
-
break;
|
|
422
|
-
case S_TAG_CLOSE:
|
|
423
|
-
throw new Error("elements closed character '/' and '>' must be connected to");
|
|
424
451
|
}
|
|
425
|
-
|
|
426
|
-
}//end outer switch
|
|
452
|
+
} //end outer switch
|
|
427
453
|
//console.log('p++',p)
|
|
428
454
|
p++;
|
|
429
455
|
}
|
|
430
456
|
}
|
|
457
|
+
|
|
431
458
|
/**
|
|
432
|
-
* @
|
|
459
|
+
* @returns `true` if a new namespace has been defined.
|
|
433
460
|
*/
|
|
434
|
-
function appendElement(el,domBuilder,currentNSMap){
|
|
461
|
+
function appendElement(el, domBuilder, currentNSMap) {
|
|
435
462
|
var tagName = el.tagName;
|
|
436
463
|
var localNSMap = null;
|
|
437
464
|
//var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
|
438
465
|
var i = el.length;
|
|
439
|
-
while(i--){
|
|
466
|
+
while (i--) {
|
|
440
467
|
var a = el[i];
|
|
441
468
|
var qName = a.qName;
|
|
442
469
|
var value = a.value;
|
|
443
470
|
var nsp = qName.indexOf(':');
|
|
444
|
-
if(nsp>0){
|
|
445
|
-
var prefix = a.prefix = qName.slice(0,nsp);
|
|
446
|
-
var localName = qName.slice(nsp+1);
|
|
447
|
-
var nsPrefix = prefix === 'xmlns' && localName
|
|
448
|
-
}else{
|
|
471
|
+
if (nsp > 0) {
|
|
472
|
+
var prefix = (a.prefix = qName.slice(0, nsp));
|
|
473
|
+
var localName = qName.slice(nsp + 1);
|
|
474
|
+
var nsPrefix = prefix === 'xmlns' && localName;
|
|
475
|
+
} else {
|
|
449
476
|
localName = qName;
|
|
450
|
-
prefix = null
|
|
451
|
-
nsPrefix = qName === 'xmlns' && ''
|
|
477
|
+
prefix = null;
|
|
478
|
+
nsPrefix = qName === 'xmlns' && '';
|
|
452
479
|
}
|
|
453
480
|
//can not set prefix,because prefix !== ''
|
|
454
|
-
a.localName = localName
|
|
481
|
+
a.localName = localName;
|
|
455
482
|
//prefix == null for no ns prefix attribute
|
|
456
|
-
if(nsPrefix !== false){
|
|
457
|
-
|
|
458
|
-
|
|
483
|
+
if (nsPrefix !== false) {
|
|
484
|
+
//hack!!
|
|
485
|
+
if (localNSMap == null) {
|
|
486
|
+
localNSMap = {};
|
|
459
487
|
//console.log(currentNSMap,0)
|
|
460
|
-
_copy(currentNSMap,currentNSMap={})
|
|
488
|
+
_copy(currentNSMap, (currentNSMap = {}));
|
|
461
489
|
//console.log(currentNSMap,1)
|
|
462
490
|
}
|
|
463
491
|
currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
|
|
464
|
-
a.uri = NAMESPACE.XMLNS
|
|
465
|
-
domBuilder.startPrefixMapping(nsPrefix, value)
|
|
492
|
+
a.uri = NAMESPACE.XMLNS;
|
|
493
|
+
domBuilder.startPrefixMapping(nsPrefix, value);
|
|
466
494
|
}
|
|
467
495
|
}
|
|
468
496
|
var i = el.length;
|
|
469
|
-
while(i--){
|
|
497
|
+
while (i--) {
|
|
470
498
|
a = el[i];
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if(prefix === 'xml'){
|
|
499
|
+
if (a.prefix) {
|
|
500
|
+
//no prefix attribute has no namespace
|
|
501
|
+
if (a.prefix === 'xml') {
|
|
474
502
|
a.uri = NAMESPACE.XML;
|
|
475
|
-
}
|
|
476
|
-
|
|
503
|
+
}
|
|
504
|
+
if (a.prefix !== 'xmlns') {
|
|
505
|
+
a.uri = currentNSMap[a.prefix];
|
|
477
506
|
}
|
|
478
507
|
}
|
|
479
508
|
}
|
|
480
509
|
var nsp = tagName.indexOf(':');
|
|
481
|
-
if(nsp>0){
|
|
482
|
-
prefix = el.prefix = tagName.slice(0,nsp);
|
|
483
|
-
localName = el.localName = tagName.slice(nsp+1);
|
|
484
|
-
}else{
|
|
485
|
-
prefix = null
|
|
510
|
+
if (nsp > 0) {
|
|
511
|
+
prefix = el.prefix = tagName.slice(0, nsp);
|
|
512
|
+
localName = el.localName = tagName.slice(nsp + 1);
|
|
513
|
+
} else {
|
|
514
|
+
prefix = null; //important!!
|
|
486
515
|
localName = el.localName = tagName;
|
|
487
516
|
}
|
|
488
517
|
//no prefix element has default namespace
|
|
489
|
-
var ns = el.uri = currentNSMap[prefix || ''];
|
|
490
|
-
domBuilder.startElement(ns,localName,tagName,el);
|
|
518
|
+
var ns = (el.uri = currentNSMap[prefix || '']);
|
|
519
|
+
domBuilder.startElement(ns, localName, tagName, el);
|
|
491
520
|
//endPrefixMapping and startPrefixMapping have not any help for dom builder
|
|
492
521
|
//localNSMap = null
|
|
493
|
-
if(el.closed){
|
|
494
|
-
domBuilder.endElement(ns,localName,tagName);
|
|
495
|
-
if(localNSMap){
|
|
496
|
-
for(prefix in localNSMap){
|
|
497
|
-
|
|
522
|
+
if (el.closed) {
|
|
523
|
+
domBuilder.endElement(ns, localName, tagName);
|
|
524
|
+
if (localNSMap) {
|
|
525
|
+
for (prefix in localNSMap) {
|
|
526
|
+
if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
|
|
527
|
+
domBuilder.endPrefixMapping(prefix);
|
|
528
|
+
}
|
|
498
529
|
}
|
|
499
530
|
}
|
|
500
|
-
}else{
|
|
531
|
+
} else {
|
|
501
532
|
el.currentNSMap = currentNSMap;
|
|
502
533
|
el.localNSMap = localNSMap;
|
|
503
534
|
//parseStack.push(el);
|
|
504
535
|
return true;
|
|
505
536
|
}
|
|
506
537
|
}
|
|
507
|
-
|
|
538
|
+
|
|
539
|
+
function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
|
|
508
540
|
// https://html.spec.whatwg.org/#raw-text-elements
|
|
509
541
|
// https://html.spec.whatwg.org/#escapable-raw-text-elements
|
|
510
542
|
// https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
|
|
511
543
|
// TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
|
|
512
544
|
var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
|
|
513
|
-
if(isEscapableRaw || isHTMLRawTextElement(tagName)){
|
|
514
|
-
var elEndStart =
|
|
515
|
-
var text = source.substring(elStartEnd+1,elEndStart);
|
|
545
|
+
if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
|
|
546
|
+
var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
|
|
547
|
+
var text = source.substring(elStartEnd + 1, elEndStart);
|
|
516
548
|
|
|
517
|
-
if(isEscapableRaw){
|
|
518
|
-
|
|
549
|
+
if (isEscapableRaw) {
|
|
550
|
+
text = text.replace(ENTITY_REG, entityReplacer);
|
|
519
551
|
}
|
|
520
|
-
|
|
521
|
-
|
|
552
|
+
domBuilder.characters(text, 0, text.length);
|
|
553
|
+
return elEndStart;
|
|
522
554
|
}
|
|
523
|
-
return elStartEnd+1;
|
|
555
|
+
return elStartEnd + 1;
|
|
524
556
|
}
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
var
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
pos = source.lastIndexOf('</'+tagName+'>')
|
|
531
|
-
if(pos<elStartEnd){//忘记闭合
|
|
532
|
-
pos = source.lastIndexOf('</'+tagName)
|
|
557
|
+
|
|
558
|
+
function _copy(source, target) {
|
|
559
|
+
for (var n in source) {
|
|
560
|
+
if (Object.prototype.hasOwnProperty.call(source, n)) {
|
|
561
|
+
target[n] = source[n];
|
|
533
562
|
}
|
|
534
|
-
closeMap[tagName] =pos
|
|
535
563
|
}
|
|
536
|
-
return pos<elStartEnd;
|
|
537
|
-
//}
|
|
538
|
-
}
|
|
539
|
-
function _copy(source,target){
|
|
540
|
-
for(var n in source){target[n] = source[n]}
|
|
541
564
|
}
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
565
|
+
|
|
566
|
+
/**
|
|
567
|
+
* @typedef ParseUtils
|
|
568
|
+
* @property {function(relativeIndex: number?): string | undefined} char
|
|
569
|
+
* Provides look ahead access to a singe character relative to the current index.
|
|
570
|
+
* @property {function(): number} getIndex
|
|
571
|
+
* Provides read-only access to the current index.
|
|
572
|
+
* @property {function(reg: RegExp): string | null} getMatch
|
|
573
|
+
* Applies the provided regular expression enforcing that it starts at the current index and
|
|
574
|
+
* returns the complete matching string,
|
|
575
|
+
* and moves the current index by the length of the matching string.
|
|
576
|
+
* @property {function(): string} getSource
|
|
577
|
+
* Provides read-only access to the complete source.
|
|
578
|
+
* @property {function(places: number?): void} skip
|
|
579
|
+
* moves the current index by places (defaults to 1)
|
|
580
|
+
* @property {function(): number} skipBlanks
|
|
581
|
+
* Moves the current index by the amount of white space that directly follows the current index
|
|
582
|
+
* and returns the amount of whitespace chars skipped (0..n),
|
|
583
|
+
* or -1 if the end of the source was reached.
|
|
584
|
+
* @property {function(): string} substringFromIndex
|
|
585
|
+
* creates a substring from the current index to the end of `source`
|
|
586
|
+
* @property {function(compareWith: string): boolean} substringStartsWith
|
|
587
|
+
* Checks if source contains `compareWith`,
|
|
588
|
+
* starting from the current index.
|
|
589
|
+
* @see {@link parseUtils}
|
|
590
|
+
*/
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* A temporary scope for parsing and look ahead operations in `source`,
|
|
594
|
+
* starting from index `start`.
|
|
595
|
+
*
|
|
596
|
+
* Some operations move the current index by a number of positions,
|
|
597
|
+
* after which `getIndex` returns the new index.
|
|
598
|
+
*
|
|
599
|
+
* @param {string} source
|
|
600
|
+
* @param {number} start
|
|
601
|
+
* @returns {ParseUtils}
|
|
602
|
+
*/
|
|
603
|
+
function parseUtils(source, start) {
|
|
604
|
+
var index = start;
|
|
605
|
+
|
|
606
|
+
function char(n) {
|
|
607
|
+
n = n || 0;
|
|
608
|
+
return source.charAt(index + n);
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
function skip(n) {
|
|
612
|
+
n = n || 1;
|
|
613
|
+
index += n;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
function skipBlanks() {
|
|
617
|
+
var blanks = 0;
|
|
618
|
+
while (index < source.length) {
|
|
619
|
+
var c = char();
|
|
620
|
+
if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
|
|
621
|
+
return blanks;
|
|
555
622
|
}
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
623
|
+
blanks++;
|
|
624
|
+
skip();
|
|
625
|
+
}
|
|
626
|
+
return -1;
|
|
627
|
+
}
|
|
628
|
+
function substringFromIndex() {
|
|
629
|
+
return source.substring(index);
|
|
630
|
+
}
|
|
631
|
+
function substringStartsWith(text) {
|
|
632
|
+
return source.substring(index, index + text.length) === text;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function getMatch(args) {
|
|
636
|
+
var expr = g.reg('^', args);
|
|
637
|
+
var match = expr.exec(substringFromIndex());
|
|
638
|
+
if (match) {
|
|
639
|
+
skip(match[0].length);
|
|
640
|
+
return match[0];
|
|
641
|
+
}
|
|
642
|
+
return null;
|
|
643
|
+
}
|
|
644
|
+
return {
|
|
645
|
+
char: char,
|
|
646
|
+
getIndex: function () {
|
|
647
|
+
return index;
|
|
648
|
+
},
|
|
649
|
+
getMatch: getMatch,
|
|
650
|
+
getSource: function () {
|
|
651
|
+
return source;
|
|
652
|
+
},
|
|
653
|
+
skip: skip,
|
|
654
|
+
skipBlanks: skipBlanks,
|
|
655
|
+
substringFromIndex: substringFromIndex,
|
|
656
|
+
substringStartsWith: substringStartsWith,
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
/**
|
|
661
|
+
* @param {ParseUtils} p
|
|
662
|
+
* @param {DOMHandler} errorHandler
|
|
663
|
+
* @returns {string}
|
|
664
|
+
*/
|
|
665
|
+
function parseDoctypeInternalSubset(p, errorHandler) {
|
|
666
|
+
/**
|
|
667
|
+
* @param {ParseUtils} p
|
|
668
|
+
* @param {DOMHandler} errorHandler
|
|
669
|
+
* @returns {string}
|
|
670
|
+
*/
|
|
671
|
+
function parsePI(p, errorHandler) {
|
|
672
|
+
var match = g.PI.exec(p.substringFromIndex());
|
|
673
|
+
if (!match) {
|
|
674
|
+
return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
|
|
559
675
|
}
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
domBuilder.characters(source,start+9,end-start-9);
|
|
565
|
-
domBuilder.endCDATA()
|
|
566
|
-
return end+3;
|
|
676
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
677
|
+
return errorHandler.fatalError(
|
|
678
|
+
'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
|
|
679
|
+
);
|
|
567
680
|
}
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
681
|
+
p.skip(match[0].length);
|
|
682
|
+
return match[0];
|
|
683
|
+
}
|
|
684
|
+
// Parse internal subset
|
|
685
|
+
var source = p.getSource();
|
|
686
|
+
if (p.char() === '[') {
|
|
687
|
+
p.skip(1);
|
|
688
|
+
var intSubsetStart = p.getIndex();
|
|
689
|
+
p.skipBlanks();
|
|
690
|
+
while (p.getIndex() < source.length) {
|
|
691
|
+
var current = null;
|
|
692
|
+
// Only in external subset
|
|
693
|
+
// if (char() === '<' && char(1) === '!' && char(2) === '[') {
|
|
694
|
+
// parseConditionalSections(p, errorHandler);
|
|
695
|
+
// } else
|
|
696
|
+
if (p.char() === '<' && p.char(1) === '!') {
|
|
697
|
+
switch (p.char(2)) {
|
|
698
|
+
case 'E':
|
|
699
|
+
if (p.char(3) === 'L') {
|
|
700
|
+
current = p.getMatch(g.elementdecl);
|
|
701
|
+
} else if (p.char(3) === 'N') {
|
|
702
|
+
current = p.getMatch(g.EntityDecl);
|
|
703
|
+
}
|
|
704
|
+
break;
|
|
705
|
+
case 'A':
|
|
706
|
+
current = p.getMatch(g.AttlistDecl);
|
|
707
|
+
break;
|
|
708
|
+
case 'N':
|
|
709
|
+
current = p.getMatch(g.NotationDecl);
|
|
710
|
+
break;
|
|
711
|
+
case '-':
|
|
712
|
+
current = p.getMatch(g.Comment);
|
|
713
|
+
break;
|
|
582
714
|
}
|
|
715
|
+
} else if (p.char() === '<' && p.char(1) === '?') {
|
|
716
|
+
current = parsePI(p, errorHandler);
|
|
717
|
+
} else if (p.char() === '%') {
|
|
718
|
+
current = p.getMatch(g.PEReference);
|
|
719
|
+
} else {
|
|
720
|
+
return errorHandler.fatalError('Error detected in Markup declaration');
|
|
583
721
|
}
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
722
|
+
if (!current) {
|
|
723
|
+
return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
|
|
724
|
+
}
|
|
725
|
+
p.skipBlanks();
|
|
726
|
+
if (p.char() === ']') {
|
|
727
|
+
var internalSubset = source.substring(intSubsetStart, p.getIndex());
|
|
728
|
+
p.skip(1);
|
|
729
|
+
return internalSubset;
|
|
730
|
+
}
|
|
731
|
+
p.skipBlanks();
|
|
589
732
|
}
|
|
733
|
+
return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
|
|
590
734
|
}
|
|
591
|
-
return -1;
|
|
592
735
|
}
|
|
593
736
|
|
|
737
|
+
/**
|
|
738
|
+
* Called when the parser encounters an element starting with '<!'.
|
|
739
|
+
*
|
|
740
|
+
* @param {string} source
|
|
741
|
+
* The xml.
|
|
742
|
+
* @param {number} start
|
|
743
|
+
* the start index of the '<!'
|
|
744
|
+
* @param {DOMHandler} domBuilder
|
|
745
|
+
* @param {DOMHandler} errorHandler
|
|
746
|
+
* @param {boolean} isHTML
|
|
747
|
+
* @returns {number | never} The end index of the element.
|
|
748
|
+
* @throws {ParseError}
|
|
749
|
+
* In case the element is not well-formed.
|
|
750
|
+
*/
|
|
751
|
+
function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
|
|
752
|
+
var p = parseUtils(source, start);
|
|
753
|
+
|
|
754
|
+
switch (p.char(2)) {
|
|
755
|
+
case '-':
|
|
756
|
+
// should be a comment
|
|
757
|
+
var comment = p.getMatch(g.Comment);
|
|
758
|
+
if (comment) {
|
|
759
|
+
domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
|
|
760
|
+
return p.getIndex();
|
|
761
|
+
} else {
|
|
762
|
+
return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
|
|
763
|
+
}
|
|
764
|
+
case '[':
|
|
765
|
+
// should be CDATA
|
|
766
|
+
var cdata = p.getMatch(g.CDSect);
|
|
767
|
+
if (cdata) {
|
|
768
|
+
if (!isHTML && !domBuilder.currentElement) {
|
|
769
|
+
return errorHandler.fatalError('CDATA outside of element');
|
|
770
|
+
}
|
|
771
|
+
domBuilder.startCDATA();
|
|
772
|
+
domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
|
|
773
|
+
domBuilder.endCDATA();
|
|
774
|
+
return p.getIndex();
|
|
775
|
+
} else {
|
|
776
|
+
return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
|
|
777
|
+
}
|
|
778
|
+
case 'D': {
|
|
779
|
+
// should be DOCTYPE
|
|
780
|
+
var doctype = {
|
|
781
|
+
name: undefined,
|
|
782
|
+
publicId: undefined,
|
|
783
|
+
systemId: undefined,
|
|
784
|
+
internalSubset: undefined,
|
|
785
|
+
};
|
|
786
|
+
|
|
787
|
+
if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
|
|
788
|
+
return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
789
|
+
}
|
|
790
|
+
p.skip(g.DOCTYPE_DECL_START.length);
|
|
791
|
+
if (p.skipBlanks() < 1) {
|
|
792
|
+
return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// Parse the DOCTYPE name
|
|
796
|
+
doctype.name = p.getMatch(g.Name);
|
|
797
|
+
if (!doctype.name)
|
|
798
|
+
return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
|
|
799
|
+
p.skipBlanks();
|
|
800
|
+
|
|
801
|
+
// Check for ExternalID
|
|
802
|
+
if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
|
|
803
|
+
var match = g.ExternalID_match.exec(p.substringFromIndex());
|
|
804
|
+
if (!match) {
|
|
805
|
+
return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
|
|
806
|
+
}
|
|
807
|
+
if (match.groups.SystemLiteralOnly !== undefined) {
|
|
808
|
+
doctype.systemId = match.groups.SystemLiteralOnly;
|
|
809
|
+
} else {
|
|
810
|
+
doctype.systemId = match.groups.SystemLiteral;
|
|
811
|
+
doctype.publicId = match.groups.PubidLiteral;
|
|
812
|
+
}
|
|
813
|
+
p.skip(match[0].length);
|
|
814
|
+
}
|
|
594
815
|
|
|
816
|
+
p.skipBlanks();
|
|
817
|
+
doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
|
|
818
|
+
p.skipBlanks();
|
|
819
|
+
if (p.char() !== '>') {
|
|
820
|
+
return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
|
|
821
|
+
}
|
|
822
|
+
p.skip(1);
|
|
823
|
+
domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
|
|
824
|
+
domBuilder.endDTD();
|
|
825
|
+
return p.getIndex();
|
|
826
|
+
}
|
|
827
|
+
default:
|
|
828
|
+
return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
|
|
829
|
+
}
|
|
830
|
+
}
|
|
595
831
|
|
|
596
|
-
function
|
|
597
|
-
var
|
|
598
|
-
if(
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
return
|
|
604
|
-
|
|
605
|
-
|
|
832
|
+
function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
|
|
833
|
+
var match = source.substring(start).match(g.PI);
|
|
834
|
+
if (!match) {
|
|
835
|
+
return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
|
|
836
|
+
}
|
|
837
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
838
|
+
if (start > 0) {
|
|
839
|
+
return errorHandler.fatalError(
|
|
840
|
+
'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
|
|
841
|
+
);
|
|
842
|
+
}
|
|
843
|
+
if (!g.XMLDecl.test(source.substring(start))) {
|
|
844
|
+
return errorHandler.fatalError('xml declaration is not well-formed');
|
|
606
845
|
}
|
|
607
846
|
}
|
|
608
|
-
|
|
847
|
+
domBuilder.processingInstruction(match[1], match[2]);
|
|
848
|
+
return start + match[0].length;
|
|
609
849
|
}
|
|
610
850
|
|
|
611
|
-
function ElementAttributes(){
|
|
612
|
-
this.attributeNames = {}
|
|
851
|
+
function ElementAttributes() {
|
|
852
|
+
this.attributeNames = {};
|
|
613
853
|
}
|
|
854
|
+
|
|
614
855
|
ElementAttributes.prototype = {
|
|
615
|
-
setTagName:function(tagName){
|
|
616
|
-
if(!
|
|
617
|
-
throw new Error('invalid tagName:'+tagName)
|
|
856
|
+
setTagName: function (tagName) {
|
|
857
|
+
if (!g.QName_exact.test(tagName)) {
|
|
858
|
+
throw new Error('invalid tagName:' + tagName);
|
|
618
859
|
}
|
|
619
|
-
this.tagName = tagName
|
|
860
|
+
this.tagName = tagName;
|
|
620
861
|
},
|
|
621
|
-
addValue:function(qName, value, offset) {
|
|
622
|
-
if(!
|
|
623
|
-
throw new Error('invalid attribute:'+qName)
|
|
862
|
+
addValue: function (qName, value, offset) {
|
|
863
|
+
if (!g.QName_exact.test(qName)) {
|
|
864
|
+
throw new Error('invalid attribute:' + qName);
|
|
624
865
|
}
|
|
625
866
|
this.attributeNames[qName] = this.length;
|
|
626
|
-
this[this.length++] = {qName:qName,value:value,offset:offset}
|
|
867
|
+
this[this.length++] = { qName: qName, value: value, offset: offset };
|
|
627
868
|
},
|
|
628
|
-
length:0,
|
|
629
|
-
getLocalName:function(i){
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
buf.push(match);
|
|
656
|
-
if(match[1])return buf;
|
|
657
|
-
}
|
|
658
|
-
}
|
|
869
|
+
length: 0,
|
|
870
|
+
getLocalName: function (i) {
|
|
871
|
+
return this[i].localName;
|
|
872
|
+
},
|
|
873
|
+
getLocator: function (i) {
|
|
874
|
+
return this[i].locator;
|
|
875
|
+
},
|
|
876
|
+
getQName: function (i) {
|
|
877
|
+
return this[i].qName;
|
|
878
|
+
},
|
|
879
|
+
getURI: function (i) {
|
|
880
|
+
return this[i].uri;
|
|
881
|
+
},
|
|
882
|
+
getValue: function (i) {
|
|
883
|
+
return this[i].value;
|
|
884
|
+
},
|
|
885
|
+
// ,getIndex:function(uri, localName)){
|
|
886
|
+
// if(localName){
|
|
887
|
+
//
|
|
888
|
+
// }else{
|
|
889
|
+
// var qName = uri
|
|
890
|
+
// }
|
|
891
|
+
// },
|
|
892
|
+
// getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
|
|
893
|
+
// getType:function(uri,localName){}
|
|
894
|
+
// getType:function(i){},
|
|
895
|
+
};
|
|
659
896
|
|
|
660
897
|
exports.XMLReader = XMLReader;
|
|
661
|
-
exports.
|
|
898
|
+
exports.parseUtils = parseUtils;
|
|
899
|
+
exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;
|