@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +286 -8
- package/SECURITY.md +8 -8
- package/index.d.ts +369 -21
- package/lib/.eslintrc.yml +1 -0
- package/lib/conventions.js +192 -112
- package/lib/dom-parser.js +301 -232
- package/lib/dom.js +1465 -871
- package/lib/entities.js +2150 -254
- package/lib/grammar.js +516 -0
- package/lib/index.js +19 -5
- package/lib/sax.js +681 -472
- package/package.json +15 -11
- package/readme.md +31 -42
package/lib/sax.js
CHANGED
|
@@ -1,246 +1,224 @@
|
|
|
1
|
-
'use strict'
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var conventions = require('./conventions');
|
|
4
|
+
var g = require('./grammar');
|
|
2
5
|
|
|
3
|
-
var conventions = require("./conventions");
|
|
4
|
-
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
|
5
6
|
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
|
|
7
|
+
var isHTMLMimeType = conventions.isHTMLMimeType;
|
|
8
|
+
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
|
|
6
9
|
var NAMESPACE = conventions.NAMESPACE;
|
|
7
|
-
var
|
|
8
|
-
|
|
9
|
-
//[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
|
10
|
-
//[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
|
|
11
|
-
//[5] Name ::= NameStartChar (NameChar)*
|
|
12
|
-
var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
|
|
13
|
-
var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
|
|
14
|
-
var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
|
|
15
|
-
//var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
|
|
10
|
+
var ParseError = conventions.ParseError;
|
|
11
|
+
|
|
16
12
|
//var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
|
|
17
13
|
|
|
18
14
|
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
19
15
|
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
|
20
|
-
var S_TAG = 0
|
|
21
|
-
var S_ATTR = 1
|
|
22
|
-
var S_ATTR_SPACE=2
|
|
23
|
-
var S_EQ = 3
|
|
24
|
-
var S_ATTR_NOQUOT_VALUE = 4
|
|
25
|
-
var S_ATTR_END = 5
|
|
26
|
-
var S_TAG_SPACE = 6
|
|
27
|
-
var S_TAG_CLOSE = 7
|
|
28
|
-
|
|
29
|
-
/**
|
|
30
|
-
* Creates an error that will not be caught by XMLReader aka the SAX parser.
|
|
31
|
-
*
|
|
32
|
-
* @param {string} message
|
|
33
|
-
* @param {any?} locator Optional, can provide details about the location in the source
|
|
34
|
-
* @constructor
|
|
35
|
-
*/
|
|
36
|
-
function ParseError(message, locator) {
|
|
37
|
-
this.message = message
|
|
38
|
-
this.locator = locator
|
|
39
|
-
if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
|
|
40
|
-
}
|
|
41
|
-
ParseError.prototype = new Error();
|
|
42
|
-
ParseError.prototype.name = ParseError.name
|
|
43
|
-
|
|
44
|
-
function XMLReader(){
|
|
16
|
+
var S_TAG = 0; //tag name offerring
|
|
17
|
+
var S_ATTR = 1; //attr name offerring
|
|
18
|
+
var S_ATTR_SPACE = 2; //attr name end and space offer
|
|
19
|
+
var S_EQ = 3; //=space?
|
|
20
|
+
var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
|
|
21
|
+
var S_ATTR_END = 5; //attr value end and no space(quot end)
|
|
22
|
+
var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
|
|
23
|
+
var S_TAG_CLOSE = 7; //closed el<el />
|
|
45
24
|
|
|
46
|
-
}
|
|
25
|
+
function XMLReader() {}
|
|
47
26
|
|
|
48
27
|
XMLReader.prototype = {
|
|
49
|
-
parse:function(source,defaultNSMap,entityMap){
|
|
28
|
+
parse: function (source, defaultNSMap, entityMap) {
|
|
50
29
|
var domBuilder = this.domBuilder;
|
|
51
30
|
domBuilder.startDocument();
|
|
52
|
-
_copy(defaultNSMap
|
|
53
|
-
parse(source,defaultNSMap,entityMap,
|
|
54
|
-
domBuilder,this.errorHandler);
|
|
31
|
+
_copy(defaultNSMap, (defaultNSMap = {}));
|
|
32
|
+
parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
|
|
55
33
|
domBuilder.endDocument();
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
|
|
38
|
+
var isHTML = isHTMLMimeType(domBuilder.mimeType);
|
|
39
|
+
|
|
60
40
|
function fixedFromCharCode(code) {
|
|
61
41
|
// String.prototype.fromCharCode does not supports
|
|
62
42
|
// > 2 bytes unicode chars directly
|
|
63
43
|
if (code > 0xffff) {
|
|
64
44
|
code -= 0x10000;
|
|
65
|
-
var surrogate1 = 0xd800 + (code >> 10)
|
|
66
|
-
|
|
45
|
+
var surrogate1 = 0xd800 + (code >> 10),
|
|
46
|
+
surrogate2 = 0xdc00 + (code & 0x3ff);
|
|
67
47
|
|
|
68
48
|
return String.fromCharCode(surrogate1, surrogate2);
|
|
69
49
|
} else {
|
|
70
50
|
return String.fromCharCode(code);
|
|
71
51
|
}
|
|
72
52
|
}
|
|
73
|
-
|
|
74
|
-
|
|
53
|
+
|
|
54
|
+
function entityReplacer(a) {
|
|
55
|
+
var k = a.slice(1, -1);
|
|
75
56
|
if (Object.hasOwnProperty.call(entityMap, k)) {
|
|
76
57
|
return entityMap[k];
|
|
77
|
-
}else if(k.charAt(0) === '#'){
|
|
78
|
-
return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
|
|
79
|
-
}else{
|
|
80
|
-
errorHandler.error('entity not found:'+a);
|
|
58
|
+
} else if (k.charAt(0) === '#') {
|
|
59
|
+
return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
|
|
60
|
+
} else {
|
|
61
|
+
errorHandler.error('entity not found:' + a);
|
|
81
62
|
return a;
|
|
82
63
|
}
|
|
83
64
|
}
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
65
|
+
|
|
66
|
+
function appendText(end) {
|
|
67
|
+
//has some bugs
|
|
68
|
+
if (end > start) {
|
|
69
|
+
var xt = source.substring(start, end).replace(/&#?\w+;/g, entityReplacer);
|
|
70
|
+
locator && position(start);
|
|
71
|
+
domBuilder.characters(xt, 0, end - start);
|
|
72
|
+
start = end;
|
|
90
73
|
}
|
|
91
74
|
}
|
|
92
|
-
|
|
93
|
-
|
|
75
|
+
|
|
76
|
+
function position(p, m) {
|
|
77
|
+
while (p >= lineEnd && (m = linePattern.exec(source))) {
|
|
94
78
|
lineStart = m.index;
|
|
95
79
|
lineEnd = lineStart + m[0].length;
|
|
96
80
|
locator.lineNumber++;
|
|
97
|
-
//console.log('line++:',locator,startPos,endPos)
|
|
98
81
|
}
|
|
99
|
-
locator.columnNumber = p-lineStart+1;
|
|
82
|
+
locator.columnNumber = p - lineStart + 1;
|
|
100
83
|
}
|
|
84
|
+
|
|
101
85
|
var lineStart = 0;
|
|
102
86
|
var lineEnd = 0;
|
|
103
|
-
var linePattern = /.*(?:\r\n?|\n)|.*$/g
|
|
87
|
+
var linePattern = /.*(?:\r\n?|\n)|.*$/g;
|
|
104
88
|
var locator = domBuilder.locator;
|
|
105
89
|
|
|
106
|
-
var parseStack = [{currentNSMap:defaultNSMapCopy}]
|
|
90
|
+
var parseStack = [{ currentNSMap: defaultNSMapCopy }];
|
|
107
91
|
var closeMap = {};
|
|
108
92
|
var start = 0;
|
|
109
|
-
while(true){
|
|
110
|
-
try{
|
|
111
|
-
var tagStart = source.indexOf('<',start);
|
|
112
|
-
if(tagStart<0){
|
|
113
|
-
if(!source.substr(start).match(/^\s*$/)){
|
|
93
|
+
while (true) {
|
|
94
|
+
try {
|
|
95
|
+
var tagStart = source.indexOf('<', start);
|
|
96
|
+
if (tagStart < 0) {
|
|
97
|
+
if (!source.substr(start).match(/^\s*$/)) {
|
|
114
98
|
var doc = domBuilder.doc;
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
99
|
+
var text = doc.createTextNode(source.substr(start));
|
|
100
|
+
doc.appendChild(text);
|
|
101
|
+
domBuilder.currentElement = text;
|
|
118
102
|
}
|
|
119
103
|
return;
|
|
120
104
|
}
|
|
121
|
-
if(tagStart>start){
|
|
105
|
+
if (tagStart > start) {
|
|
122
106
|
appendText(tagStart);
|
|
123
107
|
}
|
|
124
|
-
switch(source.charAt(tagStart+1)){
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
}
|
|
139
|
-
var localNSMap = config.localNSMap;
|
|
140
|
-
var endMatch = config.tagName == tagName;
|
|
141
|
-
var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
|
|
142
|
-
if(endIgnoreCaseMach){
|
|
143
|
-
domBuilder.endElement(config.uri,config.localName,tagName);
|
|
144
|
-
if(localNSMap){
|
|
145
|
-
for(var prefix in localNSMap){
|
|
146
|
-
domBuilder.endPrefixMapping(prefix) ;
|
|
147
|
-
}
|
|
108
|
+
switch (source.charAt(tagStart + 1)) {
|
|
109
|
+
case '/':
|
|
110
|
+
var config = parseStack.pop();
|
|
111
|
+
var end = source.indexOf('>', tagStart + 3);
|
|
112
|
+
var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
|
|
113
|
+
var tagNameMatch = g.QName_group.exec(tagNameRaw);
|
|
114
|
+
// for the root level the config does not contain the tagName
|
|
115
|
+
var tagName =
|
|
116
|
+
tagNameMatch && tagNameMatch[1] ? tagNameMatch[1] : config.tagName || domBuilder.doc.documentElement.tagName;
|
|
117
|
+
if (end < 0) {
|
|
118
|
+
errorHandler.error('end tag name: ' + tagName + ' is not complete');
|
|
119
|
+
end = tagStart + 1 + tagName.length;
|
|
120
|
+
} else if (tagNameRaw.match(/</) && !isHTML) {
|
|
121
|
+
errorHandler.error('end tag name: ' + tagName + ' maybe not complete');
|
|
148
122
|
}
|
|
149
|
-
|
|
150
|
-
|
|
123
|
+
var localNSMap = config.localNSMap;
|
|
124
|
+
var endMatch = config.tagName == tagName;
|
|
125
|
+
var endIgnoreCaseMach = endMatch || (config.tagName && config.tagName.toLowerCase() == tagName.toLowerCase());
|
|
126
|
+
if (endIgnoreCaseMach) {
|
|
127
|
+
domBuilder.endElement(config.uri, config.localName, tagName);
|
|
128
|
+
if (localNSMap) {
|
|
129
|
+
for (var prefix in localNSMap) {
|
|
130
|
+
if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
|
|
131
|
+
domBuilder.endPrefixMapping(prefix);
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
if (!endMatch) {
|
|
136
|
+
// No known test case
|
|
137
|
+
return errorHandler.fatalError(
|
|
138
|
+
'end tag name: ' + tagName + ' is not match the current start tagName:' + config.tagName
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
} else {
|
|
142
|
+
parseStack.push(config);
|
|
151
143
|
}
|
|
152
|
-
}else{
|
|
153
|
-
parseStack.push(config)
|
|
154
|
-
}
|
|
155
144
|
|
|
156
|
-
|
|
157
|
-
|
|
145
|
+
end++;
|
|
146
|
+
break;
|
|
158
147
|
// end elment
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
el,
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
)
|
|
181
|
-
var len = el.length;
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
|
|
185
|
-
el.closed = true;
|
|
186
|
-
if(!isHTML){
|
|
187
|
-
errorHandler.warning('unclosed xml attribute');
|
|
188
|
-
}
|
|
189
|
-
}
|
|
190
|
-
if(locator && len){
|
|
191
|
-
var locator2 = copyLocator(locator,{});
|
|
192
|
-
//try{//attribute position fixed
|
|
193
|
-
for(var i = 0;i<len;i++){
|
|
194
|
-
var a = el[i];
|
|
195
|
-
position(a.offset);
|
|
196
|
-
a.locator = copyLocator(locator,{});
|
|
197
|
-
}
|
|
198
|
-
domBuilder.locator = locator2
|
|
199
|
-
if(appendElement(el,domBuilder,currentNSMap)){
|
|
200
|
-
parseStack.push(el)
|
|
148
|
+
case '?': // <?...?>
|
|
149
|
+
locator && position(tagStart);
|
|
150
|
+
end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
|
|
151
|
+
break;
|
|
152
|
+
case '!': // <!doctype,<![CDATA,<!--
|
|
153
|
+
locator && position(tagStart);
|
|
154
|
+
end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler);
|
|
155
|
+
break;
|
|
156
|
+
default:
|
|
157
|
+
locator && position(tagStart);
|
|
158
|
+
var el = new ElementAttributes();
|
|
159
|
+
var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
|
|
160
|
+
//elStartEnd
|
|
161
|
+
var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
|
|
162
|
+
var len = el.length;
|
|
163
|
+
|
|
164
|
+
if (!el.closed && fixSelfClosed(source, end, el.tagName, closeMap)) {
|
|
165
|
+
el.closed = true;
|
|
166
|
+
if (!isHTML) {
|
|
167
|
+
errorHandler.warning('unclosed xml attribute');
|
|
168
|
+
}
|
|
201
169
|
}
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
170
|
+
if (locator && len) {
|
|
171
|
+
var locator2 = copyLocator(locator, {});
|
|
172
|
+
//try{//attribute position fixed
|
|
173
|
+
for (var i = 0; i < len; i++) {
|
|
174
|
+
var a = el[i];
|
|
175
|
+
position(a.offset);
|
|
176
|
+
a.locator = copyLocator(locator, {});
|
|
177
|
+
}
|
|
178
|
+
domBuilder.locator = locator2;
|
|
179
|
+
if (appendElement(el, domBuilder, currentNSMap)) {
|
|
180
|
+
parseStack.push(el);
|
|
181
|
+
}
|
|
182
|
+
domBuilder.locator = locator;
|
|
183
|
+
} else {
|
|
184
|
+
if (appendElement(el, domBuilder, currentNSMap)) {
|
|
185
|
+
parseStack.push(el);
|
|
186
|
+
}
|
|
206
187
|
}
|
|
207
|
-
}
|
|
208
188
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
189
|
+
if (isHTML && !el.closed) {
|
|
190
|
+
end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
|
|
191
|
+
} else {
|
|
192
|
+
end++;
|
|
193
|
+
}
|
|
214
194
|
}
|
|
215
|
-
}catch(e){
|
|
195
|
+
} catch (e) {
|
|
216
196
|
if (e instanceof ParseError) {
|
|
217
197
|
throw e;
|
|
218
198
|
}
|
|
219
|
-
errorHandler.error('element parse error: '+e)
|
|
199
|
+
errorHandler.error('element parse error: ' + e);
|
|
220
200
|
end = -1;
|
|
221
201
|
}
|
|
222
|
-
if(end>start){
|
|
202
|
+
if (end > start) {
|
|
223
203
|
start = end;
|
|
224
|
-
}else{
|
|
204
|
+
} else {
|
|
225
205
|
//TODO: 这里有可能sax回退,有位置错误风险
|
|
226
|
-
appendText(Math.max(tagStart,start)+1);
|
|
206
|
+
appendText(Math.max(tagStart, start) + 1);
|
|
227
207
|
}
|
|
228
208
|
}
|
|
229
209
|
}
|
|
230
|
-
|
|
210
|
+
|
|
211
|
+
function copyLocator(f, t) {
|
|
231
212
|
t.lineNumber = f.lineNumber;
|
|
232
213
|
t.columnNumber = f.columnNumber;
|
|
233
214
|
return t;
|
|
234
215
|
}
|
|
235
216
|
|
|
236
217
|
/**
|
|
237
|
-
* @
|
|
238
|
-
* @
|
|
218
|
+
* @returns end of the elementStartPart(end of elementEndPart for selfClosed el)
|
|
219
|
+
* @see {@link #appendElement}
|
|
239
220
|
*/
|
|
240
|
-
function parseElementStartPart(
|
|
241
|
-
source,start,el,currentNSMap,entityReplacer,errorHandler, isHTML
|
|
242
|
-
){
|
|
243
|
-
|
|
221
|
+
function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
|
|
244
222
|
/**
|
|
245
223
|
* @param {string} qname
|
|
246
224
|
* @param {string} value
|
|
@@ -248,7 +226,7 @@ function parseElementStartPart(
|
|
|
248
226
|
*/
|
|
249
227
|
function addAttribute(qname, value, startIndex) {
|
|
250
228
|
if (el.attributeNames.hasOwnProperty(qname)) {
|
|
251
|
-
errorHandler.fatalError('Attribute ' + qname + ' redefined')
|
|
229
|
+
return errorHandler.fatalError('Attribute ' + qname + ' redefined');
|
|
252
230
|
}
|
|
253
231
|
el.addValue(
|
|
254
232
|
qname,
|
|
@@ -258,404 +236,635 @@ function parseElementStartPart(
|
|
|
258
236
|
// - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
|
|
259
237
|
value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
|
|
260
238
|
startIndex
|
|
261
|
-
)
|
|
239
|
+
);
|
|
262
240
|
}
|
|
241
|
+
|
|
263
242
|
var attrName;
|
|
264
243
|
var value;
|
|
265
244
|
var p = ++start;
|
|
266
|
-
var s = S_TAG
|
|
267
|
-
while(true){
|
|
245
|
+
var s = S_TAG; //status
|
|
246
|
+
while (true) {
|
|
268
247
|
var c = source.charAt(p);
|
|
269
|
-
switch(c){
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
s
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
break;
|
|
281
|
-
case '\'':
|
|
282
|
-
case '"':
|
|
283
|
-
if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
|
|
284
|
-
){//equal
|
|
285
|
-
if(s === S_ATTR){
|
|
286
|
-
errorHandler.warning('attribute value must after "="')
|
|
287
|
-
attrName = source.slice(start,p)
|
|
248
|
+
switch (c) {
|
|
249
|
+
case '=':
|
|
250
|
+
if (s === S_ATTR) {
|
|
251
|
+
//attrName
|
|
252
|
+
attrName = source.slice(start, p);
|
|
253
|
+
s = S_EQ;
|
|
254
|
+
} else if (s === S_ATTR_SPACE) {
|
|
255
|
+
s = S_EQ;
|
|
256
|
+
} else {
|
|
257
|
+
//fatalError: equal must after attrName or space after attrName
|
|
258
|
+
throw new Error('attribute equal must after attrName'); // No known test case
|
|
288
259
|
}
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
260
|
+
break;
|
|
261
|
+
case "'":
|
|
262
|
+
case '"':
|
|
263
|
+
if (
|
|
264
|
+
s === S_EQ ||
|
|
265
|
+
s === S_ATTR //|| s == S_ATTR_SPACE
|
|
266
|
+
) {
|
|
267
|
+
//equal
|
|
268
|
+
if (s === S_ATTR) {
|
|
269
|
+
errorHandler.warning('attribute value must after "="');
|
|
270
|
+
attrName = source.slice(start, p);
|
|
271
|
+
}
|
|
272
|
+
start = p + 1;
|
|
273
|
+
p = source.indexOf(c, start);
|
|
274
|
+
if (p > 0) {
|
|
275
|
+
value = source.slice(start, p);
|
|
276
|
+
addAttribute(attrName, value, start - 1);
|
|
277
|
+
s = S_ATTR_END;
|
|
278
|
+
} else {
|
|
279
|
+
//fatalError: no end quot match
|
|
280
|
+
throw new Error("attribute value no end '" + c + "' match");
|
|
281
|
+
}
|
|
282
|
+
} else if (s == S_ATTR_NOQUOT_VALUE) {
|
|
292
283
|
value = source.slice(start, p);
|
|
293
|
-
addAttribute(attrName, value, start
|
|
284
|
+
addAttribute(attrName, value, start);
|
|
285
|
+
errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
|
|
286
|
+
start = p + 1;
|
|
294
287
|
s = S_ATTR_END;
|
|
295
|
-
}else{
|
|
296
|
-
//fatalError: no
|
|
297
|
-
throw new Error('attribute value
|
|
288
|
+
} else {
|
|
289
|
+
//fatalError: no equal before
|
|
290
|
+
throw new Error('attribute value must after "="'); // No known test case
|
|
298
291
|
}
|
|
299
|
-
}else if(s == S_ATTR_NOQUOT_VALUE){
|
|
300
|
-
value = source.slice(start, p);
|
|
301
|
-
addAttribute(attrName, value, start);
|
|
302
|
-
errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
|
|
303
|
-
start = p+1;
|
|
304
|
-
s = S_ATTR_END
|
|
305
|
-
}else{
|
|
306
|
-
//fatalError: no equal before
|
|
307
|
-
throw new Error('attribute value must after "="'); // No known test case
|
|
308
|
-
}
|
|
309
|
-
break;
|
|
310
|
-
case '/':
|
|
311
|
-
switch(s){
|
|
312
|
-
case S_TAG:
|
|
313
|
-
el.setTagName(source.slice(start,p));
|
|
314
|
-
case S_ATTR_END:
|
|
315
|
-
case S_TAG_SPACE:
|
|
316
|
-
case S_TAG_CLOSE:
|
|
317
|
-
s =S_TAG_CLOSE;
|
|
318
|
-
el.closed = true;
|
|
319
|
-
case S_ATTR_NOQUOT_VALUE:
|
|
320
|
-
case S_ATTR:
|
|
321
|
-
case S_ATTR_SPACE:
|
|
322
292
|
break;
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
break;//normal
|
|
342
|
-
case S_ATTR_NOQUOT_VALUE://Compatible state
|
|
343
|
-
case S_ATTR:
|
|
344
|
-
value = source.slice(start,p);
|
|
345
|
-
if(value.slice(-1) === '/'){
|
|
346
|
-
el.closed = true;
|
|
347
|
-
value = value.slice(0,-1)
|
|
348
|
-
}
|
|
349
|
-
case S_ATTR_SPACE:
|
|
350
|
-
if(s === S_ATTR_SPACE){
|
|
351
|
-
value = attrName;
|
|
352
|
-
}
|
|
353
|
-
if(s == S_ATTR_NOQUOT_VALUE){
|
|
354
|
-
errorHandler.warning('attribute "'+value+'" missed quot(")!');
|
|
355
|
-
addAttribute(attrName, value, start)
|
|
356
|
-
}else{
|
|
357
|
-
if(!isHTML){
|
|
358
|
-
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
|
|
359
|
-
}
|
|
360
|
-
addAttribute(value, value, start)
|
|
293
|
+
case '/':
|
|
294
|
+
switch (s) {
|
|
295
|
+
case S_TAG:
|
|
296
|
+
el.setTagName(source.slice(start, p));
|
|
297
|
+
case S_ATTR_END:
|
|
298
|
+
case S_TAG_SPACE:
|
|
299
|
+
case S_TAG_CLOSE:
|
|
300
|
+
s = S_TAG_CLOSE;
|
|
301
|
+
el.closed = true;
|
|
302
|
+
case S_ATTR_NOQUOT_VALUE:
|
|
303
|
+
case S_ATTR:
|
|
304
|
+
break;
|
|
305
|
+
case S_ATTR_SPACE:
|
|
306
|
+
el.closed = true;
|
|
307
|
+
break;
|
|
308
|
+
//case S_EQ:
|
|
309
|
+
default:
|
|
310
|
+
throw new Error("attribute invalid close char('/')"); // No known test case
|
|
361
311
|
}
|
|
362
312
|
break;
|
|
363
|
-
case
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
return p;
|
|
368
|
-
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
369
|
-
case '\u0080':
|
|
370
|
-
c = ' ';
|
|
371
|
-
default:
|
|
372
|
-
if(c<= ' '){//space
|
|
373
|
-
switch(s){
|
|
374
|
-
case S_TAG:
|
|
375
|
-
el.setTagName(source.slice(start,p));//tagName
|
|
376
|
-
s = S_TAG_SPACE;
|
|
377
|
-
break;
|
|
378
|
-
case S_ATTR:
|
|
379
|
-
attrName = source.slice(start,p)
|
|
380
|
-
s = S_ATTR_SPACE;
|
|
381
|
-
break;
|
|
382
|
-
case S_ATTR_NOQUOT_VALUE:
|
|
383
|
-
var value = source.slice(start, p);
|
|
384
|
-
errorHandler.warning('attribute "'+value+'" missed quot(")!!');
|
|
385
|
-
addAttribute(attrName, value, start)
|
|
386
|
-
case S_ATTR_END:
|
|
387
|
-
s = S_TAG_SPACE;
|
|
388
|
-
break;
|
|
389
|
-
//case S_TAG_SPACE:
|
|
390
|
-
//case S_EQ:
|
|
391
|
-
//case S_ATTR_SPACE:
|
|
392
|
-
// void();break;
|
|
393
|
-
//case S_TAG_CLOSE:
|
|
394
|
-
//ignore warning
|
|
313
|
+
case '': //end document
|
|
314
|
+
errorHandler.error('unexpected end of input');
|
|
315
|
+
if (s == S_TAG) {
|
|
316
|
+
el.setTagName(source.slice(start, p));
|
|
395
317
|
}
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
318
|
+
return p;
|
|
319
|
+
case '>':
|
|
320
|
+
switch (s) {
|
|
321
|
+
case S_TAG:
|
|
322
|
+
el.setTagName(source.slice(start, p));
|
|
323
|
+
case S_ATTR_END:
|
|
324
|
+
case S_TAG_SPACE:
|
|
325
|
+
case S_TAG_CLOSE:
|
|
326
|
+
break; //normal
|
|
327
|
+
case S_ATTR_NOQUOT_VALUE: //Compatible state
|
|
328
|
+
case S_ATTR:
|
|
329
|
+
value = source.slice(start, p);
|
|
330
|
+
if (value.slice(-1) === '/') {
|
|
331
|
+
el.closed = true;
|
|
332
|
+
value = value.slice(0, -1);
|
|
333
|
+
}
|
|
334
|
+
case S_ATTR_SPACE:
|
|
335
|
+
if (s === S_ATTR_SPACE) {
|
|
336
|
+
value = attrName;
|
|
337
|
+
}
|
|
338
|
+
if (s == S_ATTR_NOQUOT_VALUE) {
|
|
339
|
+
errorHandler.warning('attribute "' + value + '" missed quot(")!');
|
|
340
|
+
addAttribute(attrName, value, start);
|
|
341
|
+
} else {
|
|
342
|
+
if (!isHTML) {
|
|
343
|
+
errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
|
|
344
|
+
}
|
|
345
|
+
addAttribute(value, value, start);
|
|
346
|
+
}
|
|
347
|
+
break;
|
|
348
|
+
case S_EQ:
|
|
349
|
+
throw new Error('attribute value missed!!');
|
|
350
|
+
}
|
|
351
|
+
return p;
|
|
352
|
+
/*xml space '\x20' | #x9 | #xD | #xA; */
|
|
353
|
+
case '\u0080':
|
|
354
|
+
c = ' ';
|
|
355
|
+
default:
|
|
356
|
+
if (c <= ' ') {
|
|
357
|
+
//space
|
|
358
|
+
switch (s) {
|
|
359
|
+
case S_TAG:
|
|
360
|
+
el.setTagName(source.slice(start, p)); //tagName
|
|
361
|
+
s = S_TAG_SPACE;
|
|
362
|
+
break;
|
|
363
|
+
case S_ATTR:
|
|
364
|
+
attrName = source.slice(start, p);
|
|
365
|
+
s = S_ATTR_SPACE;
|
|
366
|
+
break;
|
|
367
|
+
case S_ATTR_NOQUOT_VALUE:
|
|
368
|
+
var value = source.slice(start, p);
|
|
369
|
+
errorHandler.warning('attribute "' + value + '" missed quot(")!!');
|
|
370
|
+
addAttribute(attrName, value, start);
|
|
371
|
+
case S_ATTR_END:
|
|
372
|
+
s = S_TAG_SPACE;
|
|
373
|
+
break;
|
|
374
|
+
//case S_TAG_SPACE:
|
|
375
|
+
//case S_EQ:
|
|
376
|
+
//case S_ATTR_SPACE:
|
|
377
|
+
// void();break;
|
|
378
|
+
//case S_TAG_CLOSE:
|
|
379
|
+
//ignore warning
|
|
380
|
+
}
|
|
381
|
+
} else {
|
|
382
|
+
//not space
|
|
383
|
+
//S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
|
|
384
|
+
//S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
|
|
385
|
+
switch (s) {
|
|
386
|
+
//case S_TAG:void();break;
|
|
387
|
+
//case S_ATTR:void();break;
|
|
388
|
+
//case S_ATTR_NOQUOT_VALUE:void();break;
|
|
389
|
+
case S_ATTR_SPACE:
|
|
390
|
+
if (!isHTML) {
|
|
391
|
+
errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
|
|
392
|
+
}
|
|
393
|
+
addAttribute(attrName, attrName, start);
|
|
394
|
+
start = p;
|
|
395
|
+
s = S_ATTR;
|
|
396
|
+
break;
|
|
397
|
+
case S_ATTR_END:
|
|
398
|
+
errorHandler.warning('attribute space is required"' + attrName + '"!!');
|
|
399
|
+
case S_TAG_SPACE:
|
|
400
|
+
s = S_ATTR;
|
|
401
|
+
start = p;
|
|
402
|
+
break;
|
|
403
|
+
case S_EQ:
|
|
404
|
+
s = S_ATTR_NOQUOT_VALUE;
|
|
405
|
+
start = p;
|
|
406
|
+
break;
|
|
407
|
+
case S_TAG_CLOSE:
|
|
408
|
+
throw new Error("elements closed character '/' and '>' must be connected to");
|
|
407
409
|
}
|
|
408
|
-
addAttribute(attrName, attrName, start);
|
|
409
|
-
start = p;
|
|
410
|
-
s = S_ATTR;
|
|
411
|
-
break;
|
|
412
|
-
case S_ATTR_END:
|
|
413
|
-
errorHandler.warning('attribute space is required"'+attrName+'"!!')
|
|
414
|
-
case S_TAG_SPACE:
|
|
415
|
-
s = S_ATTR;
|
|
416
|
-
start = p;
|
|
417
|
-
break;
|
|
418
|
-
case S_EQ:
|
|
419
|
-
s = S_ATTR_NOQUOT_VALUE;
|
|
420
|
-
start = p;
|
|
421
|
-
break;
|
|
422
|
-
case S_TAG_CLOSE:
|
|
423
|
-
throw new Error("elements closed character '/' and '>' must be connected to");
|
|
424
410
|
}
|
|
425
|
-
|
|
426
|
-
}//end outer switch
|
|
411
|
+
} //end outer switch
|
|
427
412
|
//console.log('p++',p)
|
|
428
413
|
p++;
|
|
429
414
|
}
|
|
430
415
|
}
|
|
416
|
+
|
|
431
417
|
/**
|
|
432
|
-
* @
|
|
418
|
+
* @returns `true` if a new namespace has been defined.
|
|
433
419
|
*/
|
|
434
|
-
function appendElement(el,domBuilder,currentNSMap){
|
|
420
|
+
function appendElement(el, domBuilder, currentNSMap) {
|
|
435
421
|
var tagName = el.tagName;
|
|
436
422
|
var localNSMap = null;
|
|
437
423
|
//var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
|
|
438
424
|
var i = el.length;
|
|
439
|
-
while(i--){
|
|
425
|
+
while (i--) {
|
|
440
426
|
var a = el[i];
|
|
441
427
|
var qName = a.qName;
|
|
442
428
|
var value = a.value;
|
|
443
429
|
var nsp = qName.indexOf(':');
|
|
444
|
-
if(nsp>0){
|
|
445
|
-
var prefix = a.prefix = qName.slice(0,nsp);
|
|
446
|
-
var localName = qName.slice(nsp+1);
|
|
447
|
-
var nsPrefix = prefix === 'xmlns' && localName
|
|
448
|
-
}else{
|
|
430
|
+
if (nsp > 0) {
|
|
431
|
+
var prefix = (a.prefix = qName.slice(0, nsp));
|
|
432
|
+
var localName = qName.slice(nsp + 1);
|
|
433
|
+
var nsPrefix = prefix === 'xmlns' && localName;
|
|
434
|
+
} else {
|
|
449
435
|
localName = qName;
|
|
450
|
-
prefix = null
|
|
451
|
-
nsPrefix = qName === 'xmlns' && ''
|
|
436
|
+
prefix = null;
|
|
437
|
+
nsPrefix = qName === 'xmlns' && '';
|
|
452
438
|
}
|
|
453
439
|
//can not set prefix,because prefix !== ''
|
|
454
|
-
a.localName = localName
|
|
440
|
+
a.localName = localName;
|
|
455
441
|
//prefix == null for no ns prefix attribute
|
|
456
|
-
if(nsPrefix !== false){
|
|
457
|
-
|
|
458
|
-
|
|
442
|
+
if (nsPrefix !== false) {
|
|
443
|
+
//hack!!
|
|
444
|
+
if (localNSMap == null) {
|
|
445
|
+
localNSMap = {};
|
|
459
446
|
//console.log(currentNSMap,0)
|
|
460
|
-
_copy(currentNSMap,currentNSMap={})
|
|
447
|
+
_copy(currentNSMap, (currentNSMap = {}));
|
|
461
448
|
//console.log(currentNSMap,1)
|
|
462
449
|
}
|
|
463
450
|
currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
|
|
464
|
-
a.uri = NAMESPACE.XMLNS
|
|
465
|
-
domBuilder.startPrefixMapping(nsPrefix, value)
|
|
451
|
+
a.uri = NAMESPACE.XMLNS;
|
|
452
|
+
domBuilder.startPrefixMapping(nsPrefix, value);
|
|
466
453
|
}
|
|
467
454
|
}
|
|
468
455
|
var i = el.length;
|
|
469
|
-
while(i--){
|
|
456
|
+
while (i--) {
|
|
470
457
|
a = el[i];
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if(prefix === 'xml'){
|
|
458
|
+
if (a.prefix) {
|
|
459
|
+
//no prefix attribute has no namespace
|
|
460
|
+
if (a.prefix === 'xml') {
|
|
474
461
|
a.uri = NAMESPACE.XML;
|
|
475
|
-
}
|
|
476
|
-
|
|
462
|
+
}
|
|
463
|
+
if (a.prefix !== 'xmlns') {
|
|
464
|
+
a.uri = currentNSMap[a.prefix];
|
|
477
465
|
}
|
|
478
466
|
}
|
|
479
467
|
}
|
|
480
468
|
var nsp = tagName.indexOf(':');
|
|
481
|
-
if(nsp>0){
|
|
482
|
-
prefix = el.prefix = tagName.slice(0,nsp);
|
|
483
|
-
localName = el.localName = tagName.slice(nsp+1);
|
|
484
|
-
}else{
|
|
485
|
-
prefix = null
|
|
469
|
+
if (nsp > 0) {
|
|
470
|
+
prefix = el.prefix = tagName.slice(0, nsp);
|
|
471
|
+
localName = el.localName = tagName.slice(nsp + 1);
|
|
472
|
+
} else {
|
|
473
|
+
prefix = null; //important!!
|
|
486
474
|
localName = el.localName = tagName;
|
|
487
475
|
}
|
|
488
476
|
//no prefix element has default namespace
|
|
489
|
-
var ns = el.uri = currentNSMap[prefix || ''];
|
|
490
|
-
domBuilder.startElement(ns,localName,tagName,el);
|
|
477
|
+
var ns = (el.uri = currentNSMap[prefix || '']);
|
|
478
|
+
domBuilder.startElement(ns, localName, tagName, el);
|
|
491
479
|
//endPrefixMapping and startPrefixMapping have not any help for dom builder
|
|
492
480
|
//localNSMap = null
|
|
493
|
-
if(el.closed){
|
|
494
|
-
domBuilder.endElement(ns,localName,tagName);
|
|
495
|
-
if(localNSMap){
|
|
496
|
-
for(prefix in localNSMap){
|
|
497
|
-
|
|
481
|
+
if (el.closed) {
|
|
482
|
+
domBuilder.endElement(ns, localName, tagName);
|
|
483
|
+
if (localNSMap) {
|
|
484
|
+
for (prefix in localNSMap) {
|
|
485
|
+
if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
|
|
486
|
+
domBuilder.endPrefixMapping(prefix);
|
|
487
|
+
}
|
|
498
488
|
}
|
|
499
489
|
}
|
|
500
|
-
}else{
|
|
490
|
+
} else {
|
|
501
491
|
el.currentNSMap = currentNSMap;
|
|
502
492
|
el.localNSMap = localNSMap;
|
|
503
493
|
//parseStack.push(el);
|
|
504
494
|
return true;
|
|
505
495
|
}
|
|
506
496
|
}
|
|
507
|
-
|
|
497
|
+
|
|
498
|
+
function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
|
|
508
499
|
// https://html.spec.whatwg.org/#raw-text-elements
|
|
509
500
|
// https://html.spec.whatwg.org/#escapable-raw-text-elements
|
|
510
501
|
// https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
|
|
511
502
|
// TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
|
|
512
503
|
var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
|
|
513
|
-
if(isEscapableRaw || isHTMLRawTextElement(tagName)){
|
|
514
|
-
var elEndStart =
|
|
515
|
-
var text = source.substring(elStartEnd+1,elEndStart);
|
|
504
|
+
if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
|
|
505
|
+
var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
|
|
506
|
+
var text = source.substring(elStartEnd + 1, elEndStart);
|
|
516
507
|
|
|
517
|
-
if(isEscapableRaw){
|
|
518
|
-
|
|
508
|
+
if (isEscapableRaw) {
|
|
509
|
+
text = text.replace(/&#?\w+;/g, entityReplacer);
|
|
519
510
|
}
|
|
520
|
-
|
|
521
|
-
|
|
511
|
+
domBuilder.characters(text, 0, text.length);
|
|
512
|
+
return elEndStart;
|
|
522
513
|
}
|
|
523
|
-
return elStartEnd+1;
|
|
514
|
+
return elStartEnd + 1;
|
|
524
515
|
}
|
|
525
|
-
|
|
516
|
+
|
|
517
|
+
function fixSelfClosed(source, elStartEnd, tagName, closeMap) {
|
|
526
518
|
//if(tagName in closeMap){
|
|
527
519
|
var pos = closeMap[tagName];
|
|
528
|
-
if(pos == null){
|
|
520
|
+
if (pos == null) {
|
|
529
521
|
//console.log(tagName)
|
|
530
|
-
pos =
|
|
531
|
-
if(pos<elStartEnd){
|
|
532
|
-
|
|
522
|
+
pos = source.lastIndexOf('</' + tagName + '>');
|
|
523
|
+
if (pos < elStartEnd) {
|
|
524
|
+
//忘记闭合
|
|
525
|
+
pos = source.lastIndexOf('</' + tagName);
|
|
533
526
|
}
|
|
534
|
-
closeMap[tagName] =pos
|
|
527
|
+
closeMap[tagName] = pos;
|
|
535
528
|
}
|
|
536
|
-
return pos<elStartEnd;
|
|
529
|
+
return pos < elStartEnd;
|
|
537
530
|
//}
|
|
538
531
|
}
|
|
539
|
-
|
|
540
|
-
|
|
532
|
+
|
|
533
|
+
function _copy(source, target) {
|
|
534
|
+
for (var n in source) {
|
|
535
|
+
if (Object.prototype.hasOwnProperty.call(source, n)) {
|
|
536
|
+
target[n] = source[n];
|
|
537
|
+
}
|
|
538
|
+
}
|
|
541
539
|
}
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* @typedef ParseUtils
|
|
543
|
+
* @property {function(relativeIndex: number?): string | undefined} char
|
|
544
|
+
* Provides look ahead access to a singe character relative to the current index.
|
|
545
|
+
* @property {function(): number} getIndex
|
|
546
|
+
* Provides read-only access to the current index.
|
|
547
|
+
* @property {function(reg: RegExp): string | null} getMatch
|
|
548
|
+
* Applies the provided regular expression enforcing that it starts at the current index and
|
|
549
|
+
* returns the complete matching string,
|
|
550
|
+
* and moves the current index by the length of the matching string.
|
|
551
|
+
* @property {function(): string} getSource
|
|
552
|
+
* Provides read-only access to the complete source.
|
|
553
|
+
* @property {function(places: number?): void} skip
|
|
554
|
+
* moves the current index by places (defaults to 1)
|
|
555
|
+
* @property {function(): number} skipBlanks
|
|
556
|
+
* Moves the current index by the amount of white space that directly follows the current index
|
|
557
|
+
* and returns the amount of whitespace chars skipped (0..n),
|
|
558
|
+
* or -1 if the end of the source was reached.
|
|
559
|
+
* @property {function(): string} substringFromIndex
|
|
560
|
+
* creates a substring from the current index to the end of `source`
|
|
561
|
+
* @property {function(compareWith: string): boolean} substringStartsWith
|
|
562
|
+
* Checks if source contains `compareWith`,
|
|
563
|
+
* starting from the current index.
|
|
564
|
+
* @see {@link parseUtils}
|
|
565
|
+
*/
|
|
566
|
+
|
|
567
|
+
/**
|
|
568
|
+
* A temporary scope for parsing and look ahead operations in `source`,
|
|
569
|
+
* starting from index `start`.
|
|
570
|
+
*
|
|
571
|
+
* Some operations move the current index by a number of positions,
|
|
572
|
+
* after which `getIndex` returns the new index.
|
|
573
|
+
*
|
|
574
|
+
* @param {string} source
|
|
575
|
+
* @param {number} start
|
|
576
|
+
* @returns {ParseUtils}
|
|
577
|
+
*/
|
|
578
|
+
function parseUtils(source, start) {
|
|
579
|
+
var index = start;
|
|
580
|
+
|
|
581
|
+
function char(n) {
|
|
582
|
+
n = n || 0;
|
|
583
|
+
return source.charAt(index + n);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function skip(n) {
|
|
587
|
+
n = n || 1;
|
|
588
|
+
index += n;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
function skipBlanks() {
|
|
592
|
+
var blanks = 0;
|
|
593
|
+
while (index < source.length) {
|
|
594
|
+
var c = char();
|
|
595
|
+
if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
|
|
596
|
+
return blanks;
|
|
555
597
|
}
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
598
|
+
blanks++;
|
|
599
|
+
skip();
|
|
600
|
+
}
|
|
601
|
+
return -1;
|
|
602
|
+
}
|
|
603
|
+
function substringFromIndex() {
|
|
604
|
+
return source.substring(index);
|
|
605
|
+
}
|
|
606
|
+
function substringStartsWith(text) {
|
|
607
|
+
return source.substring(index, index + text.length) === text;
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function getMatch(args) {
|
|
611
|
+
var expr = g.reg('^', args);
|
|
612
|
+
var match = expr.exec(substringFromIndex());
|
|
613
|
+
if (match) {
|
|
614
|
+
skip(match[0].length);
|
|
615
|
+
return match[0];
|
|
616
|
+
}
|
|
617
|
+
return null;
|
|
618
|
+
}
|
|
619
|
+
return {
|
|
620
|
+
char: char,
|
|
621
|
+
getIndex: function () {
|
|
622
|
+
return index;
|
|
623
|
+
},
|
|
624
|
+
getMatch: getMatch,
|
|
625
|
+
getSource: function () {
|
|
626
|
+
return source;
|
|
627
|
+
},
|
|
628
|
+
skip: skip,
|
|
629
|
+
skipBlanks: skipBlanks,
|
|
630
|
+
substringFromIndex: substringFromIndex,
|
|
631
|
+
substringStartsWith: substringStartsWith,
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
/**
|
|
636
|
+
* @param {ParseUtils} p
|
|
637
|
+
* @param {DOMHandler} errorHandler
|
|
638
|
+
* @returns {string}
|
|
639
|
+
*/
|
|
640
|
+
function parseDoctypeInternalSubset(p, errorHandler) {
|
|
641
|
+
/**
|
|
642
|
+
* @param {ParseUtils} p
|
|
643
|
+
* @param {DOMHandler} errorHandler
|
|
644
|
+
* @returns {string}
|
|
645
|
+
*/
|
|
646
|
+
function parsePI(p, errorHandler) {
|
|
647
|
+
var match = g.PI.exec(p.substringFromIndex());
|
|
648
|
+
if (!match) {
|
|
649
|
+
return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
|
|
559
650
|
}
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
domBuilder.characters(source,start+9,end-start-9);
|
|
565
|
-
domBuilder.endCDATA()
|
|
566
|
-
return end+3;
|
|
651
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
652
|
+
return errorHandler.fatalError(
|
|
653
|
+
'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
|
|
654
|
+
);
|
|
567
655
|
}
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
656
|
+
p.skip(match[0].length);
|
|
657
|
+
return match[0];
|
|
658
|
+
}
|
|
659
|
+
// Parse internal subset
|
|
660
|
+
var source = p.getSource();
|
|
661
|
+
if (p.char() === '[') {
|
|
662
|
+
p.skip(1);
|
|
663
|
+
var intSubsetStart = p.getIndex();
|
|
664
|
+
p.skipBlanks();
|
|
665
|
+
while (p.getIndex() < source.length) {
|
|
666
|
+
var current = null;
|
|
667
|
+
// Only in external subset
|
|
668
|
+
// if (char() === '<' && char(1) === '!' && char(2) === '[') {
|
|
669
|
+
// parseConditionalSections(p, errorHandler);
|
|
670
|
+
// } else
|
|
671
|
+
if (p.char() === '<' && p.char(1) === '!') {
|
|
672
|
+
switch (p.char(2)) {
|
|
673
|
+
case 'E':
|
|
674
|
+
if (p.char(3) === 'L') {
|
|
675
|
+
current = p.getMatch(g.elementdecl);
|
|
676
|
+
} else if (p.char(3) === 'N') {
|
|
677
|
+
current = p.getMatch(g.EntityDecl);
|
|
678
|
+
}
|
|
679
|
+
break;
|
|
680
|
+
case 'A':
|
|
681
|
+
current = p.getMatch(g.AttlistDecl);
|
|
682
|
+
break;
|
|
683
|
+
case 'N':
|
|
684
|
+
current = p.getMatch(g.NotationDecl);
|
|
685
|
+
break;
|
|
686
|
+
case '-':
|
|
687
|
+
current = p.getMatch(g.Comment);
|
|
688
|
+
break;
|
|
582
689
|
}
|
|
690
|
+
} else if (p.char() === '<' && p.char(1) === '?') {
|
|
691
|
+
current = parsePI(p, errorHandler);
|
|
692
|
+
} else if (p.char() === '%') {
|
|
693
|
+
current = p.getMatch(g.PEReference);
|
|
694
|
+
} else {
|
|
695
|
+
return errorHandler.fatalError('Error detected in Markup declaration');
|
|
583
696
|
}
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
697
|
+
if (!current) {
|
|
698
|
+
return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
|
|
699
|
+
}
|
|
700
|
+
p.skipBlanks();
|
|
701
|
+
if (p.char() === ']') {
|
|
702
|
+
var internalSubset = source.substring(intSubsetStart, p.getIndex());
|
|
703
|
+
p.skip(1);
|
|
704
|
+
return internalSubset;
|
|
705
|
+
}
|
|
706
|
+
p.skipBlanks();
|
|
589
707
|
}
|
|
708
|
+
return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
|
|
590
709
|
}
|
|
591
|
-
return -1;
|
|
592
710
|
}
|
|
593
711
|
|
|
712
|
+
/**
|
|
713
|
+
* Called when the parser encounters an element starting with '<!'.
|
|
714
|
+
*
|
|
715
|
+
* @param {string} source
|
|
716
|
+
* The xml.
|
|
717
|
+
* @param {number} start
|
|
718
|
+
* the start index of the '<!'
|
|
719
|
+
* @param {DOMHandler} domBuilder
|
|
720
|
+
* @param {DOMHandler} errorHandler
|
|
721
|
+
* @returns {number | never} The end index of the element.
|
|
722
|
+
* @throws {ParseError}
|
|
723
|
+
* In case the element is not well-formed.
|
|
724
|
+
*/
|
|
725
|
+
function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler) {
|
|
726
|
+
var p = parseUtils(source, start);
|
|
727
|
+
|
|
728
|
+
switch (p.char(2)) {
|
|
729
|
+
case '-':
|
|
730
|
+
// should be a comment
|
|
731
|
+
var comment = p.getMatch(g.Comment);
|
|
732
|
+
if (comment) {
|
|
733
|
+
domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
|
|
734
|
+
return p.getIndex();
|
|
735
|
+
} else {
|
|
736
|
+
return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
|
|
737
|
+
}
|
|
738
|
+
case '[':
|
|
739
|
+
// should be CDATA
|
|
740
|
+
var cdata = p.getMatch(g.CDSect);
|
|
741
|
+
if (cdata) {
|
|
742
|
+
domBuilder.startCDATA();
|
|
743
|
+
domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
|
|
744
|
+
domBuilder.endCDATA();
|
|
745
|
+
return p.getIndex();
|
|
746
|
+
} else {
|
|
747
|
+
return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
|
|
748
|
+
}
|
|
749
|
+
case 'D': {
|
|
750
|
+
// should be DOCTYPE
|
|
751
|
+
var doctype = {
|
|
752
|
+
name: undefined,
|
|
753
|
+
publicId: undefined,
|
|
754
|
+
systemId: undefined,
|
|
755
|
+
internalSubset: undefined,
|
|
756
|
+
};
|
|
757
|
+
|
|
758
|
+
if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
|
|
759
|
+
return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
760
|
+
}
|
|
761
|
+
p.skip(g.DOCTYPE_DECL_START.length);
|
|
762
|
+
if (p.skipBlanks() < 1) {
|
|
763
|
+
return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// Parse the DOCTYPE name
|
|
767
|
+
doctype.name = p.getMatch(g.Name);
|
|
768
|
+
if (!doctype.name)
|
|
769
|
+
return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
|
|
770
|
+
p.skipBlanks();
|
|
771
|
+
|
|
772
|
+
// Check for ExternalID
|
|
773
|
+
if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
|
|
774
|
+
var match = g.ExternalID_match.exec(p.substringFromIndex());
|
|
775
|
+
if (!match) {
|
|
776
|
+
return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
|
|
777
|
+
}
|
|
778
|
+
if (match.groups.SystemLiteralOnly !== undefined) {
|
|
779
|
+
doctype.systemId = match.groups.SystemLiteralOnly;
|
|
780
|
+
} else {
|
|
781
|
+
doctype.systemId = match.groups.SystemLiteral;
|
|
782
|
+
doctype.publicId = match.groups.PubidLiteral;
|
|
783
|
+
}
|
|
784
|
+
p.skip(match[0].length);
|
|
785
|
+
}
|
|
594
786
|
|
|
787
|
+
p.skipBlanks();
|
|
788
|
+
doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
|
|
789
|
+
p.skipBlanks();
|
|
790
|
+
if (p.char() !== '>') {
|
|
791
|
+
return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
|
|
792
|
+
}
|
|
793
|
+
p.skip(1);
|
|
794
|
+
domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
|
|
795
|
+
domBuilder.endDTD();
|
|
796
|
+
return p.getIndex();
|
|
797
|
+
}
|
|
798
|
+
default:
|
|
799
|
+
return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
|
|
800
|
+
}
|
|
801
|
+
}
|
|
595
802
|
|
|
596
|
-
function
|
|
597
|
-
var
|
|
598
|
-
if(
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
return
|
|
604
|
-
|
|
605
|
-
|
|
803
|
+
function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
|
|
804
|
+
var match = source.substring(start).match(g.PI);
|
|
805
|
+
if (!match) {
|
|
806
|
+
return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
|
|
807
|
+
}
|
|
808
|
+
if (match[1].toLowerCase() === 'xml') {
|
|
809
|
+
if (start > 0) {
|
|
810
|
+
return errorHandler.fatalError(
|
|
811
|
+
'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
|
|
812
|
+
);
|
|
813
|
+
}
|
|
814
|
+
if (!g.XMLDecl.test(source.substring(start))) {
|
|
815
|
+
return errorHandler.fatalError('xml declaration is not well-formed');
|
|
606
816
|
}
|
|
607
817
|
}
|
|
608
|
-
|
|
818
|
+
domBuilder.processingInstruction(match[1], match[2]);
|
|
819
|
+
return start + match[0].length;
|
|
609
820
|
}
|
|
610
821
|
|
|
611
|
-
function ElementAttributes(){
|
|
612
|
-
this.attributeNames = {}
|
|
822
|
+
function ElementAttributes() {
|
|
823
|
+
this.attributeNames = {};
|
|
613
824
|
}
|
|
825
|
+
|
|
614
826
|
ElementAttributes.prototype = {
|
|
615
|
-
setTagName:function(tagName){
|
|
616
|
-
if(!
|
|
617
|
-
throw new Error('invalid tagName:'+tagName)
|
|
827
|
+
setTagName: function (tagName) {
|
|
828
|
+
if (!g.QName_exact.test(tagName)) {
|
|
829
|
+
throw new Error('invalid tagName:' + tagName);
|
|
618
830
|
}
|
|
619
|
-
this.tagName = tagName
|
|
831
|
+
this.tagName = tagName;
|
|
620
832
|
},
|
|
621
|
-
addValue:function(qName, value, offset) {
|
|
622
|
-
if(!
|
|
623
|
-
throw new Error('invalid attribute:'+qName)
|
|
833
|
+
addValue: function (qName, value, offset) {
|
|
834
|
+
if (!g.QName_exact.test(qName)) {
|
|
835
|
+
throw new Error('invalid attribute:' + qName);
|
|
624
836
|
}
|
|
625
837
|
this.attributeNames[qName] = this.length;
|
|
626
|
-
this[this.length++] = {qName:qName,value:value,offset:offset}
|
|
838
|
+
this[this.length++] = { qName: qName, value: value, offset: offset };
|
|
627
839
|
},
|
|
628
|
-
length:0,
|
|
629
|
-
getLocalName:function(i){
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
buf.push(match);
|
|
656
|
-
if(match[1])return buf;
|
|
657
|
-
}
|
|
658
|
-
}
|
|
840
|
+
length: 0,
|
|
841
|
+
getLocalName: function (i) {
|
|
842
|
+
return this[i].localName;
|
|
843
|
+
},
|
|
844
|
+
getLocator: function (i) {
|
|
845
|
+
return this[i].locator;
|
|
846
|
+
},
|
|
847
|
+
getQName: function (i) {
|
|
848
|
+
return this[i].qName;
|
|
849
|
+
},
|
|
850
|
+
getURI: function (i) {
|
|
851
|
+
return this[i].uri;
|
|
852
|
+
},
|
|
853
|
+
getValue: function (i) {
|
|
854
|
+
return this[i].value;
|
|
855
|
+
},
|
|
856
|
+
// ,getIndex:function(uri, localName)){
|
|
857
|
+
// if(localName){
|
|
858
|
+
//
|
|
859
|
+
// }else{
|
|
860
|
+
// var qName = uri
|
|
861
|
+
// }
|
|
862
|
+
// },
|
|
863
|
+
// getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
|
|
864
|
+
// getType:function(uri,localName){}
|
|
865
|
+
// getType:function(i){},
|
|
866
|
+
};
|
|
659
867
|
|
|
660
868
|
exports.XMLReader = XMLReader;
|
|
661
|
-
exports.
|
|
869
|
+
exports.parseUtils = parseUtils;
|
|
870
|
+
exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;
|