@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/sax.js CHANGED
@@ -1,246 +1,260 @@
1
- 'use strict'
1
+ 'use strict';
2
+
3
+ var conventions = require('./conventions');
4
+ var g = require('./grammar');
2
5
 
3
- var conventions = require("./conventions");
4
- var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
5
6
  var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
7
+ var isHTMLMimeType = conventions.isHTMLMimeType;
8
+ var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
6
9
  var NAMESPACE = conventions.NAMESPACE;
7
- var MIME_TYPE = conventions.MIME_TYPE;
8
-
9
- //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
10
- //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
11
- //[5] Name ::= NameStartChar (NameChar)*
12
- var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
13
- var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
14
- var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
15
- //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
10
+ var ParseError = conventions.ParseError;
11
+
16
12
  //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
17
13
 
18
14
  //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
19
15
  //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
20
- var S_TAG = 0;//tag name offerring
21
- var S_ATTR = 1;//attr name offerring
22
- var S_ATTR_SPACE=2;//attr name end and space offer
23
- var S_EQ = 3;//=space?
24
- var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
25
- var S_ATTR_END = 5;//attr value end and no space(quot end)
26
- var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
27
- var S_TAG_CLOSE = 7;//closed el<el />
16
+ var S_TAG = 0; //tag name offerring
17
+ var S_ATTR = 1; //attr name offerring
18
+ var S_ATTR_SPACE = 2; //attr name end and space offer
19
+ var S_EQ = 3; //=space?
20
+ var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
21
+ var S_ATTR_END = 5; //attr value end and no space(quot end)
22
+ var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
23
+ var S_TAG_CLOSE = 7; //closed el<el />
28
24
 
29
- /**
30
- * Creates an error that will not be caught by XMLReader aka the SAX parser.
31
- *
32
- * @param {string} message
33
- * @param {any?} locator Optional, can provide details about the location in the source
34
- * @constructor
35
- */
36
- function ParseError(message, locator) {
37
- this.message = message
38
- this.locator = locator
39
- if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
40
- }
41
- ParseError.prototype = new Error();
42
- ParseError.prototype.name = ParseError.name
43
-
44
- function XMLReader(){
45
-
46
- }
25
+ function XMLReader() {}
47
26
 
48
27
  XMLReader.prototype = {
49
- parse:function(source,defaultNSMap,entityMap){
28
+ parse: function (source, defaultNSMap, entityMap) {
50
29
  var domBuilder = this.domBuilder;
51
30
  domBuilder.startDocument();
52
- _copy(defaultNSMap ,defaultNSMap = {})
53
- parse(source,defaultNSMap,entityMap,
54
- domBuilder,this.errorHandler);
31
+ _copy(defaultNSMap, (defaultNSMap = {}));
32
+ parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
55
33
  domBuilder.endDocument();
34
+ },
35
+ };
36
+
37
+ /**
38
+ * Detecting everything that might be a reference,
39
+ * including those without ending `;`, since those are allowed in HTML.
40
+ * The entityReplacer takes care of verifying and transforming each occurrence,
41
+ * and reports to the errorHandler on those that are not OK,
42
+ * depending on the context.
43
+ */
44
+ var ENTITY_REG = /&#?\w+;?/g;
45
+
46
+ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
47
+ var isHTML = isHTMLMimeType(domBuilder.mimeType);
48
+ if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
49
+ return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
56
50
  }
57
- }
58
- function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
59
- var isHTML = MIME_TYPE.isHTML(domBuilder.mimeType);
51
+
60
52
  function fixedFromCharCode(code) {
61
53
  // String.prototype.fromCharCode does not supports
62
54
  // > 2 bytes unicode chars directly
63
55
  if (code > 0xffff) {
64
56
  code -= 0x10000;
65
- var surrogate1 = 0xd800 + (code >> 10)
66
- , surrogate2 = 0xdc00 + (code & 0x3ff);
57
+ var surrogate1 = 0xd800 + (code >> 10),
58
+ surrogate2 = 0xdc00 + (code & 0x3ff);
67
59
 
68
60
  return String.fromCharCode(surrogate1, surrogate2);
69
61
  } else {
70
62
  return String.fromCharCode(code);
71
63
  }
72
64
  }
73
- function entityReplacer(a){
74
- var k = a.slice(1,-1);
65
+
66
+ function entityReplacer(a) {
67
+ var complete = a[a.length - 1] === ';' ? a : a + ';';
68
+ if (!isHTML && complete !== a) {
69
+ errorHandler.error('EntityRef: expecting ;');
70
+ return a;
71
+ }
72
+ var match = g.Reference.exec(complete);
73
+ if (!match || match[0].length !== complete.length) {
74
+ errorHandler.error('entity not matching Reference production: ' + a);
75
+ return a;
76
+ }
77
+ var k = complete.slice(1, -1);
75
78
  if (Object.hasOwnProperty.call(entityMap, k)) {
76
79
  return entityMap[k];
77
- }else if(k.charAt(0) === '#'){
78
- return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
79
- }else{
80
- errorHandler.error('entity not found:'+a);
80
+ } else if (k.charAt(0) === '#') {
81
+ return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
82
+ } else {
83
+ errorHandler.error('entity not found:' + a);
81
84
  return a;
82
85
  }
83
86
  }
84
- function appendText(end){//has some bugs
85
- if(end>start){
86
- var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
87
- locator&&position(start);
88
- domBuilder.characters(xt,0,end-start);
89
- start = end
87
+
88
+ function appendText(end) {
89
+ //has some bugs
90
+ if (end > start) {
91
+ var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
92
+ locator && position(start);
93
+ domBuilder.characters(xt, 0, end - start);
94
+ start = end;
90
95
  }
91
96
  }
92
- function position(p,m){
93
- while(p>=lineEnd && (m = linePattern.exec(source))){
97
+
98
+ function position(p, m) {
99
+ while (p >= lineEnd && (m = linePattern.exec(source))) {
94
100
  lineStart = m.index;
95
101
  lineEnd = lineStart + m[0].length;
96
102
  locator.lineNumber++;
97
- //console.log('line++:',locator,startPos,endPos)
98
103
  }
99
- locator.columnNumber = p-lineStart+1;
104
+ locator.columnNumber = p - lineStart + 1;
100
105
  }
106
+
101
107
  var lineStart = 0;
102
108
  var lineEnd = 0;
103
- var linePattern = /.*(?:\r\n?|\n)|.*$/g
109
+ var linePattern = /.*(?:\r\n?|\n)|.*$/g;
104
110
  var locator = domBuilder.locator;
105
111
 
106
- var parseStack = [{currentNSMap:defaultNSMapCopy}]
107
- var closeMap = {};
112
+ var parseStack = [{ currentNSMap: defaultNSMapCopy }];
113
+ var unclosedTags = [];
108
114
  var start = 0;
109
- while(true){
110
- try{
111
- var tagStart = source.indexOf('<',start);
112
- if(tagStart<0){
113
- if(!source.substr(start).match(/^\s*$/)){
115
+ while (true) {
116
+ try {
117
+ var tagStart = source.indexOf('<', start);
118
+ if (tagStart < 0) {
119
+ if (!isHTML && unclosedTags.length > 0) {
120
+ return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
121
+ }
122
+ if (!source.substring(start).match(/^\s*$/)) {
114
123
  var doc = domBuilder.doc;
115
- var text = doc.createTextNode(source.substr(start));
116
- doc.appendChild(text);
117
- domBuilder.currentElement = text;
124
+ var text = doc.createTextNode(source.substr(start));
125
+ if (doc.documentElement) {
126
+ return errorHandler.error('Extra content at the end of the document');
127
+ }
128
+ doc.appendChild(text);
129
+ domBuilder.currentElement = text;
118
130
  }
119
131
  return;
120
132
  }
121
- if(tagStart>start){
133
+ if (tagStart > start) {
134
+ var fromSource = source.substring(start, tagStart);
135
+ if (!isHTML && unclosedTags.length === 0) {
136
+ fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
137
+ fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
138
+ }
122
139
  appendText(tagStart);
123
140
  }
124
- switch(source.charAt(tagStart+1)){
125
- case '/':
126
- var end = source.indexOf('>',tagStart+3);
127
- var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, '');
128
- var config = parseStack.pop();
129
- if(end<0){
130
-
131
- tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
132
- errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
133
- end = tagStart+1+tagName.length;
134
- }else if(tagName.match(/\s</)){
135
- tagName = tagName.replace(/[\s<].*/,'');
136
- errorHandler.error("end tag name: "+tagName+' maybe not complete');
137
- end = tagStart+1+tagName.length;
138
- }
139
- var localNSMap = config.localNSMap;
140
- var endMatch = config.tagName == tagName;
141
- var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
142
- if(endIgnoreCaseMach){
143
- domBuilder.endElement(config.uri,config.localName,tagName);
144
- if(localNSMap){
145
- for(var prefix in localNSMap){
146
- domBuilder.endPrefixMapping(prefix) ;
147
- }
148
- }
149
- if(!endMatch){
150
- errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
141
+ switch (source.charAt(tagStart + 1)) {
142
+ case '/':
143
+ var end = source.indexOf('>', tagStart + 2);
144
+ var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
145
+ if (!tagNameRaw) {
146
+ return errorHandler.fatalError('end tag name missing');
151
147
  }
152
- }else{
153
- parseStack.push(config)
154
- }
155
-
156
- end++;
157
- break;
158
- // end elment
159
- case '?':// <?...?>
160
- locator&&position(tagStart);
161
- end = parseInstruction(source,tagStart,domBuilder);
162
- break;
163
- case '!':// <!doctype,<![CDATA,<!--
164
- locator&&position(tagStart);
165
- end = parseDCC(source,tagStart,domBuilder,errorHandler);
166
- break;
167
- default:
168
- locator&&position(tagStart);
169
- var el = new ElementAttributes();
170
- var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
171
- //elStartEnd
172
- var end = parseElementStartPart(
173
- source,
174
- tagStart,
175
- el,
176
- currentNSMap,
177
- entityReplacer,
178
- errorHandler,
179
- isHTML
180
- )
181
- var len = el.length;
182
-
183
-
184
- if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
185
- el.closed = true;
186
- if(!isHTML){
187
- errorHandler.warning('unclosed xml attribute');
148
+ var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
149
+ if (!tagNameMatch) {
150
+ return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
188
151
  }
189
- }
190
- if(locator && len){
191
- var locator2 = copyLocator(locator,{});
192
- //try{//attribute position fixed
193
- for(var i = 0;i<len;i++){
194
- var a = el[i];
195
- position(a.offset);
196
- a.locator = copyLocator(locator,{});
152
+ if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
153
+ // not enough information to provide a helpful error message,
154
+ // but parsing will throw since there is no root element
155
+ return;
197
156
  }
198
- domBuilder.locator = locator2
199
- if(appendElement(el,domBuilder,currentNSMap)){
200
- parseStack.push(el)
157
+ var currentTagName =
158
+ unclosedTags[unclosedTags.length - 1] ||
159
+ domBuilder.currentElement.tagName ||
160
+ domBuilder.doc.documentElement.tagName ||
161
+ '';
162
+ if (currentTagName !== tagNameMatch[1]) {
163
+ var tagNameLower = tagNameMatch[1].toLowerCase();
164
+ if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
165
+ return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
166
+ }
201
167
  }
202
- domBuilder.locator = locator;
203
- }else{
204
- if(appendElement(el,domBuilder,currentNSMap)){
205
- parseStack.push(el)
168
+ var config = parseStack.pop();
169
+ unclosedTags.pop();
170
+ var localNSMap = config.localNSMap;
171
+ domBuilder.endElement(config.uri, config.localName, currentTagName);
172
+ if (localNSMap) {
173
+ for (var prefix in localNSMap) {
174
+ if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
175
+ domBuilder.endPrefixMapping(prefix);
176
+ }
177
+ }
206
178
  }
207
- }
208
179
 
209
- if (isHTML && !el.closed) {
210
- end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
211
- } else {
212
180
  end++;
213
- }
181
+ break;
182
+ // end element
183
+ case '?': // <?...?>
184
+ locator && position(tagStart);
185
+ end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
186
+ break;
187
+ case '!': // <!doctype,<![CDATA,<!--
188
+ locator && position(tagStart);
189
+ end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
190
+ break;
191
+ default:
192
+ locator && position(tagStart);
193
+ var el = new ElementAttributes();
194
+ var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
195
+ //elStartEnd
196
+ var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
197
+ var len = el.length;
198
+
199
+ if (!el.closed) {
200
+ if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
201
+ el.closed = true;
202
+ } else {
203
+ unclosedTags.push(el.tagName);
204
+ }
205
+ }
206
+ if (locator && len) {
207
+ var locator2 = copyLocator(locator, {});
208
+ //try{//attribute position fixed
209
+ for (var i = 0; i < len; i++) {
210
+ var a = el[i];
211
+ position(a.offset);
212
+ a.locator = copyLocator(locator, {});
213
+ }
214
+ domBuilder.locator = locator2;
215
+ if (appendElement(el, domBuilder, currentNSMap)) {
216
+ parseStack.push(el);
217
+ }
218
+ domBuilder.locator = locator;
219
+ } else {
220
+ if (appendElement(el, domBuilder, currentNSMap)) {
221
+ parseStack.push(el);
222
+ }
223
+ }
224
+
225
+ if (isHTML && !el.closed) {
226
+ end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
227
+ } else {
228
+ end++;
229
+ }
214
230
  }
215
- }catch(e){
231
+ } catch (e) {
216
232
  if (e instanceof ParseError) {
217
233
  throw e;
218
234
  }
219
- errorHandler.error('element parse error: '+e)
235
+ errorHandler.error('element parse error: ' + e);
220
236
  end = -1;
221
237
  }
222
- if(end>start){
238
+ if (end > start) {
223
239
  start = end;
224
- }else{
225
- //TODO: 这里有可能sax回退,有位置错误风险
226
- appendText(Math.max(tagStart,start)+1);
240
+ } else {
241
+ //Possible sax fallback here, risk of positional error
242
+ appendText(Math.max(tagStart, start) + 1);
227
243
  }
228
244
  }
229
245
  }
230
- function copyLocator(f,t){
246
+
247
+ function copyLocator(f, t) {
231
248
  t.lineNumber = f.lineNumber;
232
249
  t.columnNumber = f.columnNumber;
233
250
  return t;
234
251
  }
235
252
 
236
253
  /**
237
- * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
238
- * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
254
+ * @returns end of the elementStartPart(end of elementEndPart for selfClosed el)
255
+ * @see {@link #appendElement}
239
256
  */
240
- function parseElementStartPart(
241
- source,start,el,currentNSMap,entityReplacer,errorHandler, isHTML
242
- ){
243
-
257
+ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
244
258
  /**
245
259
  * @param {string} qname
246
260
  * @param {string} value
@@ -248,7 +262,10 @@ function parseElementStartPart(
248
262
  */
249
263
  function addAttribute(qname, value, startIndex) {
250
264
  if (el.attributeNames.hasOwnProperty(qname)) {
251
- errorHandler.fatalError('Attribute ' + qname + ' redefined')
265
+ return errorHandler.fatalError('Attribute ' + qname + ' redefined');
266
+ }
267
+ if (!isHTML && value.indexOf('<') >= 0) {
268
+ return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
252
269
  }
253
270
  el.addValue(
254
271
  qname,
@@ -256,406 +273,627 @@ function parseElementStartPart(
256
273
  // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
257
274
  // - recursive replacement of (DTD) entity references
258
275
  // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
259
- value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
276
+ value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
260
277
  startIndex
261
- )
278
+ );
262
279
  }
280
+
263
281
  var attrName;
264
282
  var value;
265
283
  var p = ++start;
266
- var s = S_TAG;//status
267
- while(true){
284
+ var s = S_TAG; //status
285
+ while (true) {
268
286
  var c = source.charAt(p);
269
- switch(c){
270
- case '=':
271
- if(s === S_ATTR){//attrName
272
- attrName = source.slice(start,p);
273
- s = S_EQ;
274
- }else if(s === S_ATTR_SPACE){
275
- s = S_EQ;
276
- }else{
277
- //fatalError: equal must after attrName or space after attrName
278
- throw new Error('attribute equal must after attrName'); // No known test case
279
- }
280
- break;
281
- case '\'':
282
- case '"':
283
- if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
284
- ){//equal
285
- if(s === S_ATTR){
286
- errorHandler.warning('attribute value must after "="')
287
- attrName = source.slice(start,p)
287
+ switch (c) {
288
+ case '=':
289
+ if (s === S_ATTR) {
290
+ //attrName
291
+ attrName = source.slice(start, p);
292
+ s = S_EQ;
293
+ } else if (s === S_ATTR_SPACE) {
294
+ s = S_EQ;
295
+ } else {
296
+ //fatalError: equal must after attrName or space after attrName
297
+ throw new Error('attribute equal must after attrName'); // No known test case
288
298
  }
289
- start = p+1;
290
- p = source.indexOf(c,start)
291
- if(p>0){
299
+ break;
300
+ case "'":
301
+ case '"':
302
+ if (
303
+ s === S_EQ ||
304
+ s === S_ATTR //|| s == S_ATTR_SPACE
305
+ ) {
306
+ //equal
307
+ if (s === S_ATTR) {
308
+ errorHandler.warning('attribute value must after "="');
309
+ attrName = source.slice(start, p);
310
+ }
311
+ start = p + 1;
312
+ p = source.indexOf(c, start);
313
+ if (p > 0) {
314
+ value = source.slice(start, p);
315
+ addAttribute(attrName, value, start - 1);
316
+ s = S_ATTR_END;
317
+ } else {
318
+ //fatalError: no end quot match
319
+ throw new Error("attribute value no end '" + c + "' match");
320
+ }
321
+ } else if (s == S_ATTR_NOQUOT_VALUE) {
292
322
  value = source.slice(start, p);
293
- addAttribute(attrName, value, start-1);
323
+ addAttribute(attrName, value, start);
324
+ errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
325
+ start = p + 1;
294
326
  s = S_ATTR_END;
295
- }else{
296
- //fatalError: no end quot match
297
- throw new Error('attribute value no end \''+c+'\' match');
327
+ } else {
328
+ //fatalError: no equal before
329
+ throw new Error('attribute value must after "="'); // No known test case
298
330
  }
299
- }else if(s == S_ATTR_NOQUOT_VALUE){
300
- value = source.slice(start, p);
301
- addAttribute(attrName, value, start);
302
- errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
303
- start = p+1;
304
- s = S_ATTR_END
305
- }else{
306
- //fatalError: no equal before
307
- throw new Error('attribute value must after "="'); // No known test case
308
- }
309
- break;
310
- case '/':
311
- switch(s){
312
- case S_TAG:
313
- el.setTagName(source.slice(start,p));
314
- case S_ATTR_END:
315
- case S_TAG_SPACE:
316
- case S_TAG_CLOSE:
317
- s =S_TAG_CLOSE;
318
- el.closed = true;
319
- case S_ATTR_NOQUOT_VALUE:
320
- case S_ATTR:
321
- case S_ATTR_SPACE:
322
331
  break;
323
- //case S_EQ:
324
- default:
325
- throw new Error("attribute invalid close char('/')") // No known test case
326
- }
327
- break;
328
- case ''://end document
329
- errorHandler.error('unexpected end of input');
330
- if(s == S_TAG){
331
- el.setTagName(source.slice(start,p));
332
- }
333
- return p;
334
- case '>':
335
- switch(s){
336
- case S_TAG:
337
- el.setTagName(source.slice(start,p));
338
- case S_ATTR_END:
339
- case S_TAG_SPACE:
340
- case S_TAG_CLOSE:
341
- break;//normal
342
- case S_ATTR_NOQUOT_VALUE://Compatible state
343
- case S_ATTR:
344
- value = source.slice(start,p);
345
- if(value.slice(-1) === '/'){
346
- el.closed = true;
347
- value = value.slice(0,-1)
348
- }
349
- case S_ATTR_SPACE:
350
- if(s === S_ATTR_SPACE){
351
- value = attrName;
352
- }
353
- if(s == S_ATTR_NOQUOT_VALUE){
354
- errorHandler.warning('attribute "'+value+'" missed quot(")!');
355
- addAttribute(attrName, value, start)
356
- }else{
357
- if(!isHTML){
358
- errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
359
- }
360
- addAttribute(value, value, start)
332
+ case '/':
333
+ switch (s) {
334
+ case S_TAG:
335
+ el.setTagName(source.slice(start, p));
336
+ case S_ATTR_END:
337
+ case S_TAG_SPACE:
338
+ case S_TAG_CLOSE:
339
+ s = S_TAG_CLOSE;
340
+ el.closed = true;
341
+ case S_ATTR_NOQUOT_VALUE:
342
+ case S_ATTR:
343
+ break;
344
+ case S_ATTR_SPACE:
345
+ el.closed = true;
346
+ break;
347
+ //case S_EQ:
348
+ default:
349
+ throw new Error("attribute invalid close char('/')"); // No known test case
361
350
  }
362
351
  break;
363
- case S_EQ:
364
- throw new Error('attribute value missed!!');
365
- }
366
- // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
367
- return p;
368
- /*xml space '\x20' | #x9 | #xD | #xA; */
369
- case '\u0080':
370
- c = ' ';
371
- default:
372
- if(c<= ' '){//space
373
- switch(s){
374
- case S_TAG:
375
- el.setTagName(source.slice(start,p));//tagName
376
- s = S_TAG_SPACE;
377
- break;
378
- case S_ATTR:
379
- attrName = source.slice(start,p)
380
- s = S_ATTR_SPACE;
381
- break;
382
- case S_ATTR_NOQUOT_VALUE:
383
- var value = source.slice(start, p);
384
- errorHandler.warning('attribute "'+value+'" missed quot(")!!');
385
- addAttribute(attrName, value, start)
386
- case S_ATTR_END:
387
- s = S_TAG_SPACE;
388
- break;
389
- //case S_TAG_SPACE:
390
- //case S_EQ:
391
- //case S_ATTR_SPACE:
392
- // void();break;
393
- //case S_TAG_CLOSE:
394
- //ignore warning
352
+ case '': //end document
353
+ errorHandler.error('unexpected end of input');
354
+ if (s == S_TAG) {
355
+ el.setTagName(source.slice(start, p));
395
356
  }
396
- }else{//not space
397
- //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
398
- //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
399
- switch(s){
400
- //case S_TAG:void();break;
401
- //case S_ATTR:void();break;
402
- //case S_ATTR_NOQUOT_VALUE:void();break;
403
- case S_ATTR_SPACE:
404
- var tagName = el.tagName;
405
- if (!isHTML) {
406
- errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
357
+ return p;
358
+ case '>':
359
+ switch (s) {
360
+ case S_TAG:
361
+ el.setTagName(source.slice(start, p));
362
+ case S_ATTR_END:
363
+ case S_TAG_SPACE:
364
+ case S_TAG_CLOSE:
365
+ break; //normal
366
+ case S_ATTR_NOQUOT_VALUE: //Compatible state
367
+ case S_ATTR:
368
+ value = source.slice(start, p);
369
+ if (value.slice(-1) === '/') {
370
+ el.closed = true;
371
+ value = value.slice(0, -1);
372
+ }
373
+ case S_ATTR_SPACE:
374
+ if (s === S_ATTR_SPACE) {
375
+ value = attrName;
376
+ }
377
+ if (s == S_ATTR_NOQUOT_VALUE) {
378
+ errorHandler.warning('attribute "' + value + '" missed quot(")!');
379
+ addAttribute(attrName, value, start);
380
+ } else {
381
+ if (!isHTML) {
382
+ errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
383
+ }
384
+ addAttribute(value, value, start);
385
+ }
386
+ break;
387
+ case S_EQ:
388
+ if (!isHTML) {
389
+ return errorHandler.fatalError('AttValue: \' or " expected');
390
+ }
391
+ }
392
+ return p;
393
+ /*xml space '\x20' | #x9 | #xD | #xA; */
394
+ case '\u0080':
395
+ c = ' ';
396
+ default:
397
+ if (c <= ' ') {
398
+ //space
399
+ switch (s) {
400
+ case S_TAG:
401
+ el.setTagName(source.slice(start, p)); //tagName
402
+ s = S_TAG_SPACE;
403
+ break;
404
+ case S_ATTR:
405
+ attrName = source.slice(start, p);
406
+ s = S_ATTR_SPACE;
407
+ break;
408
+ case S_ATTR_NOQUOT_VALUE:
409
+ var value = source.slice(start, p);
410
+ errorHandler.warning('attribute "' + value + '" missed quot(")!!');
411
+ addAttribute(attrName, value, start);
412
+ case S_ATTR_END:
413
+ s = S_TAG_SPACE;
414
+ break;
415
+ //case S_TAG_SPACE:
416
+ //case S_EQ:
417
+ //case S_ATTR_SPACE:
418
+ // void();break;
419
+ //case S_TAG_CLOSE:
420
+ //ignore warning
421
+ }
422
+ } else {
423
+ //not space
424
+ //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
425
+ //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
426
+ switch (s) {
427
+ //case S_TAG:void();break;
428
+ //case S_ATTR:void();break;
429
+ //case S_ATTR_NOQUOT_VALUE:void();break;
430
+ case S_ATTR_SPACE:
431
+ if (!isHTML) {
432
+ errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
433
+ }
434
+ addAttribute(attrName, attrName, start);
435
+ start = p;
436
+ s = S_ATTR;
437
+ break;
438
+ case S_ATTR_END:
439
+ errorHandler.warning('attribute space is required"' + attrName + '"!!');
440
+ case S_TAG_SPACE:
441
+ s = S_ATTR;
442
+ start = p;
443
+ break;
444
+ case S_EQ:
445
+ s = S_ATTR_NOQUOT_VALUE;
446
+ start = p;
447
+ break;
448
+ case S_TAG_CLOSE:
449
+ throw new Error("elements closed character '/' and '>' must be connected to");
407
450
  }
408
- addAttribute(attrName, attrName, start);
409
- start = p;
410
- s = S_ATTR;
411
- break;
412
- case S_ATTR_END:
413
- errorHandler.warning('attribute space is required"'+attrName+'"!!')
414
- case S_TAG_SPACE:
415
- s = S_ATTR;
416
- start = p;
417
- break;
418
- case S_EQ:
419
- s = S_ATTR_NOQUOT_VALUE;
420
- start = p;
421
- break;
422
- case S_TAG_CLOSE:
423
- throw new Error("elements closed character '/' and '>' must be connected to");
424
451
  }
425
- }
426
- }//end outer switch
452
+ } //end outer switch
427
453
  //console.log('p++',p)
428
454
  p++;
429
455
  }
430
456
  }
457
+
431
458
  /**
432
- * @return true if has new namespace define
459
+ * @returns `true` if a new namespace has been defined.
433
460
  */
434
- function appendElement(el,domBuilder,currentNSMap){
461
+ function appendElement(el, domBuilder, currentNSMap) {
435
462
  var tagName = el.tagName;
436
463
  var localNSMap = null;
437
464
  //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
438
465
  var i = el.length;
439
- while(i--){
466
+ while (i--) {
440
467
  var a = el[i];
441
468
  var qName = a.qName;
442
469
  var value = a.value;
443
470
  var nsp = qName.indexOf(':');
444
- if(nsp>0){
445
- var prefix = a.prefix = qName.slice(0,nsp);
446
- var localName = qName.slice(nsp+1);
447
- var nsPrefix = prefix === 'xmlns' && localName
448
- }else{
471
+ if (nsp > 0) {
472
+ var prefix = (a.prefix = qName.slice(0, nsp));
473
+ var localName = qName.slice(nsp + 1);
474
+ var nsPrefix = prefix === 'xmlns' && localName;
475
+ } else {
449
476
  localName = qName;
450
- prefix = null
451
- nsPrefix = qName === 'xmlns' && ''
477
+ prefix = null;
478
+ nsPrefix = qName === 'xmlns' && '';
452
479
  }
453
480
  //can not set prefix,because prefix !== ''
454
- a.localName = localName ;
481
+ a.localName = localName;
455
482
  //prefix == null for no ns prefix attribute
456
- if(nsPrefix !== false){//hack!!
457
- if(localNSMap == null){
458
- localNSMap = {}
483
+ if (nsPrefix !== false) {
484
+ //hack!!
485
+ if (localNSMap == null) {
486
+ localNSMap = {};
459
487
  //console.log(currentNSMap,0)
460
- _copy(currentNSMap,currentNSMap={})
488
+ _copy(currentNSMap, (currentNSMap = {}));
461
489
  //console.log(currentNSMap,1)
462
490
  }
463
491
  currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
464
- a.uri = NAMESPACE.XMLNS
465
- domBuilder.startPrefixMapping(nsPrefix, value)
492
+ a.uri = NAMESPACE.XMLNS;
493
+ domBuilder.startPrefixMapping(nsPrefix, value);
466
494
  }
467
495
  }
468
496
  var i = el.length;
469
- while(i--){
497
+ while (i--) {
470
498
  a = el[i];
471
- var prefix = a.prefix;
472
- if(prefix){//no prefix attribute has no namespace
473
- if(prefix === 'xml'){
499
+ if (a.prefix) {
500
+ //no prefix attribute has no namespace
501
+ if (a.prefix === 'xml') {
474
502
  a.uri = NAMESPACE.XML;
475
- }if(prefix !== 'xmlns'){
476
- a.uri = currentNSMap[prefix || '']
503
+ }
504
+ if (a.prefix !== 'xmlns') {
505
+ a.uri = currentNSMap[a.prefix];
477
506
  }
478
507
  }
479
508
  }
480
509
  var nsp = tagName.indexOf(':');
481
- if(nsp>0){
482
- prefix = el.prefix = tagName.slice(0,nsp);
483
- localName = el.localName = tagName.slice(nsp+1);
484
- }else{
485
- prefix = null;//important!!
510
+ if (nsp > 0) {
511
+ prefix = el.prefix = tagName.slice(0, nsp);
512
+ localName = el.localName = tagName.slice(nsp + 1);
513
+ } else {
514
+ prefix = null; //important!!
486
515
  localName = el.localName = tagName;
487
516
  }
488
517
  //no prefix element has default namespace
489
- var ns = el.uri = currentNSMap[prefix || ''];
490
- domBuilder.startElement(ns,localName,tagName,el);
518
+ var ns = (el.uri = currentNSMap[prefix || '']);
519
+ domBuilder.startElement(ns, localName, tagName, el);
491
520
  //endPrefixMapping and startPrefixMapping have not any help for dom builder
492
521
  //localNSMap = null
493
- if(el.closed){
494
- domBuilder.endElement(ns,localName,tagName);
495
- if(localNSMap){
496
- for(prefix in localNSMap){
497
- domBuilder.endPrefixMapping(prefix)
522
+ if (el.closed) {
523
+ domBuilder.endElement(ns, localName, tagName);
524
+ if (localNSMap) {
525
+ for (prefix in localNSMap) {
526
+ if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
527
+ domBuilder.endPrefixMapping(prefix);
528
+ }
498
529
  }
499
530
  }
500
- }else{
531
+ } else {
501
532
  el.currentNSMap = currentNSMap;
502
533
  el.localNSMap = localNSMap;
503
534
  //parseStack.push(el);
504
535
  return true;
505
536
  }
506
537
  }
507
- function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
538
+
539
+ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
508
540
  // https://html.spec.whatwg.org/#raw-text-elements
509
541
  // https://html.spec.whatwg.org/#escapable-raw-text-elements
510
542
  // https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
511
543
  // TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
512
544
  var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
513
- if(isEscapableRaw || isHTMLRawTextElement(tagName)){
514
- var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
515
- var text = source.substring(elStartEnd+1,elEndStart);
545
+ if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
546
+ var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
547
+ var text = source.substring(elStartEnd + 1, elEndStart);
516
548
 
517
- if(isEscapableRaw){
518
- text = text.replace(/&#?\w+;/g,entityReplacer);
549
+ if (isEscapableRaw) {
550
+ text = text.replace(ENTITY_REG, entityReplacer);
519
551
  }
520
- domBuilder.characters(text,0,text.length);
521
- return elEndStart;
552
+ domBuilder.characters(text, 0, text.length);
553
+ return elEndStart;
522
554
  }
523
- return elStartEnd+1;
555
+ return elStartEnd + 1;
524
556
  }
525
- function fixSelfClosed(source,elStartEnd,tagName,closeMap){
526
- //if(tagName in closeMap){
527
- var pos = closeMap[tagName];
528
- if(pos == null){
529
- //console.log(tagName)
530
- pos = source.lastIndexOf('</'+tagName+'>')
531
- if(pos<elStartEnd){//忘记闭合
532
- pos = source.lastIndexOf('</'+tagName)
557
+
558
+ function _copy(source, target) {
559
+ for (var n in source) {
560
+ if (Object.prototype.hasOwnProperty.call(source, n)) {
561
+ target[n] = source[n];
533
562
  }
534
- closeMap[tagName] =pos
535
563
  }
536
- return pos<elStartEnd;
537
- //}
538
- }
539
- function _copy(source,target){
540
- for(var n in source){target[n] = source[n]}
541
564
  }
542
- function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
543
- var next= source.charAt(start+2)
544
- switch(next){
545
- case '-':
546
- if(source.charAt(start + 3) === '-'){
547
- var end = source.indexOf('-->',start+4);
548
- //append comment source.substring(4,end)//<!--
549
- if(end>start){
550
- domBuilder.comment(source,start+4,end-start-4);
551
- return end+3;
552
- }else{
553
- errorHandler.error("Unclosed comment");
554
- return -1;
565
+
566
+ /**
567
+ * @typedef ParseUtils
568
+ * @property {function(relativeIndex: number?): string | undefined} char
569
+ * Provides look ahead access to a singe character relative to the current index.
570
+ * @property {function(): number} getIndex
571
+ * Provides read-only access to the current index.
572
+ * @property {function(reg: RegExp): string | null} getMatch
573
+ * Applies the provided regular expression enforcing that it starts at the current index and
574
+ * returns the complete matching string,
575
+ * and moves the current index by the length of the matching string.
576
+ * @property {function(): string} getSource
577
+ * Provides read-only access to the complete source.
578
+ * @property {function(places: number?): void} skip
579
+ * moves the current index by places (defaults to 1)
580
+ * @property {function(): number} skipBlanks
581
+ * Moves the current index by the amount of white space that directly follows the current index
582
+ * and returns the amount of whitespace chars skipped (0..n),
583
+ * or -1 if the end of the source was reached.
584
+ * @property {function(): string} substringFromIndex
585
+ * creates a substring from the current index to the end of `source`
586
+ * @property {function(compareWith: string): boolean} substringStartsWith
587
+ * Checks if source contains `compareWith`,
588
+ * starting from the current index.
589
+ * @see {@link parseUtils}
590
+ */
591
+
592
+ /**
593
+ * A temporary scope for parsing and look ahead operations in `source`,
594
+ * starting from index `start`.
595
+ *
596
+ * Some operations move the current index by a number of positions,
597
+ * after which `getIndex` returns the new index.
598
+ *
599
+ * @param {string} source
600
+ * @param {number} start
601
+ * @returns {ParseUtils}
602
+ */
603
+ function parseUtils(source, start) {
604
+ var index = start;
605
+
606
+ function char(n) {
607
+ n = n || 0;
608
+ return source.charAt(index + n);
609
+ }
610
+
611
+ function skip(n) {
612
+ n = n || 1;
613
+ index += n;
614
+ }
615
+
616
+ function skipBlanks() {
617
+ var blanks = 0;
618
+ while (index < source.length) {
619
+ var c = char();
620
+ if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
621
+ return blanks;
555
622
  }
556
- }else{
557
- //error
558
- return -1;
623
+ blanks++;
624
+ skip();
625
+ }
626
+ return -1;
627
+ }
628
+ function substringFromIndex() {
629
+ return source.substring(index);
630
+ }
631
+ function substringStartsWith(text) {
632
+ return source.substring(index, index + text.length) === text;
633
+ }
634
+
635
+ function getMatch(args) {
636
+ var expr = g.reg('^', args);
637
+ var match = expr.exec(substringFromIndex());
638
+ if (match) {
639
+ skip(match[0].length);
640
+ return match[0];
641
+ }
642
+ return null;
643
+ }
644
+ return {
645
+ char: char,
646
+ getIndex: function () {
647
+ return index;
648
+ },
649
+ getMatch: getMatch,
650
+ getSource: function () {
651
+ return source;
652
+ },
653
+ skip: skip,
654
+ skipBlanks: skipBlanks,
655
+ substringFromIndex: substringFromIndex,
656
+ substringStartsWith: substringStartsWith,
657
+ };
658
+ }
659
+
660
+ /**
661
+ * @param {ParseUtils} p
662
+ * @param {DOMHandler} errorHandler
663
+ * @returns {string}
664
+ */
665
+ function parseDoctypeInternalSubset(p, errorHandler) {
666
+ /**
667
+ * @param {ParseUtils} p
668
+ * @param {DOMHandler} errorHandler
669
+ * @returns {string}
670
+ */
671
+ function parsePI(p, errorHandler) {
672
+ var match = g.PI.exec(p.substringFromIndex());
673
+ if (!match) {
674
+ return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
559
675
  }
560
- default:
561
- if(source.substr(start+3,6) == 'CDATA['){
562
- var end = source.indexOf(']]>',start+9);
563
- domBuilder.startCDATA();
564
- domBuilder.characters(source,start+9,end-start-9);
565
- domBuilder.endCDATA()
566
- return end+3;
676
+ if (match[1].toLowerCase() === 'xml') {
677
+ return errorHandler.fatalError(
678
+ 'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
679
+ );
567
680
  }
568
- //<!DOCTYPE
569
- //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
570
- var matchs = split(source,start);
571
- var len = matchs.length;
572
- if(len>1 && /!doctype/i.test(matchs[0][0])){
573
- var name = matchs[1][0];
574
- var pubid = false;
575
- var sysid = false;
576
- if(len>3){
577
- if(/^public$/i.test(matchs[2][0])){
578
- pubid = matchs[3][0];
579
- sysid = len>4 && matchs[4][0];
580
- }else if(/^system$/i.test(matchs[2][0])){
581
- sysid = matchs[3][0];
681
+ p.skip(match[0].length);
682
+ return match[0];
683
+ }
684
+ // Parse internal subset
685
+ var source = p.getSource();
686
+ if (p.char() === '[') {
687
+ p.skip(1);
688
+ var intSubsetStart = p.getIndex();
689
+ p.skipBlanks();
690
+ while (p.getIndex() < source.length) {
691
+ var current = null;
692
+ // Only in external subset
693
+ // if (char() === '<' && char(1) === '!' && char(2) === '[') {
694
+ // parseConditionalSections(p, errorHandler);
695
+ // } else
696
+ if (p.char() === '<' && p.char(1) === '!') {
697
+ switch (p.char(2)) {
698
+ case 'E':
699
+ if (p.char(3) === 'L') {
700
+ current = p.getMatch(g.elementdecl);
701
+ } else if (p.char(3) === 'N') {
702
+ current = p.getMatch(g.EntityDecl);
703
+ }
704
+ break;
705
+ case 'A':
706
+ current = p.getMatch(g.AttlistDecl);
707
+ break;
708
+ case 'N':
709
+ current = p.getMatch(g.NotationDecl);
710
+ break;
711
+ case '-':
712
+ current = p.getMatch(g.Comment);
713
+ break;
582
714
  }
715
+ } else if (p.char() === '<' && p.char(1) === '?') {
716
+ current = parsePI(p, errorHandler);
717
+ } else if (p.char() === '%') {
718
+ current = p.getMatch(g.PEReference);
719
+ } else {
720
+ return errorHandler.fatalError('Error detected in Markup declaration');
583
721
  }
584
- var lastMatch = matchs[len-1]
585
- domBuilder.startDTD(name, pubid, sysid);
586
- domBuilder.endDTD();
587
-
588
- return lastMatch.index+lastMatch[0].length
722
+ if (!current) {
723
+ return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
724
+ }
725
+ p.skipBlanks();
726
+ if (p.char() === ']') {
727
+ var internalSubset = source.substring(intSubsetStart, p.getIndex());
728
+ p.skip(1);
729
+ return internalSubset;
730
+ }
731
+ p.skipBlanks();
589
732
  }
733
+ return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
590
734
  }
591
- return -1;
592
735
  }
593
736
 
737
+ /**
738
+ * Called when the parser encounters an element starting with '<!'.
739
+ *
740
+ * @param {string} source
741
+ * The xml.
742
+ * @param {number} start
743
+ * the start index of the '<!'
744
+ * @param {DOMHandler} domBuilder
745
+ * @param {DOMHandler} errorHandler
746
+ * @param {boolean} isHTML
747
+ * @returns {number | never} The end index of the element.
748
+ * @throws {ParseError}
749
+ * In case the element is not well-formed.
750
+ */
751
+ function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
752
+ var p = parseUtils(source, start);
753
+
754
+ switch (p.char(2)) {
755
+ case '-':
756
+ // should be a comment
757
+ var comment = p.getMatch(g.Comment);
758
+ if (comment) {
759
+ domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
760
+ return p.getIndex();
761
+ } else {
762
+ return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
763
+ }
764
+ case '[':
765
+ // should be CDATA
766
+ var cdata = p.getMatch(g.CDSect);
767
+ if (cdata) {
768
+ if (!isHTML && !domBuilder.currentElement) {
769
+ return errorHandler.fatalError('CDATA outside of element');
770
+ }
771
+ domBuilder.startCDATA();
772
+ domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
773
+ domBuilder.endCDATA();
774
+ return p.getIndex();
775
+ } else {
776
+ return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
777
+ }
778
+ case 'D': {
779
+ // should be DOCTYPE
780
+ var doctype = {
781
+ name: undefined,
782
+ publicId: undefined,
783
+ systemId: undefined,
784
+ internalSubset: undefined,
785
+ };
786
+
787
+ if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
788
+ return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
789
+ }
790
+ p.skip(g.DOCTYPE_DECL_START.length);
791
+ if (p.skipBlanks() < 1) {
792
+ return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
793
+ }
794
+
795
+ // Parse the DOCTYPE name
796
+ doctype.name = p.getMatch(g.Name);
797
+ if (!doctype.name)
798
+ return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
799
+ p.skipBlanks();
800
+
801
+ // Check for ExternalID
802
+ if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
803
+ var match = g.ExternalID_match.exec(p.substringFromIndex());
804
+ if (!match) {
805
+ return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
806
+ }
807
+ if (match.groups.SystemLiteralOnly !== undefined) {
808
+ doctype.systemId = match.groups.SystemLiteralOnly;
809
+ } else {
810
+ doctype.systemId = match.groups.SystemLiteral;
811
+ doctype.publicId = match.groups.PubidLiteral;
812
+ }
813
+ p.skip(match[0].length);
814
+ }
594
815
 
816
+ p.skipBlanks();
817
+ doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
818
+ p.skipBlanks();
819
+ if (p.char() !== '>') {
820
+ return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
821
+ }
822
+ p.skip(1);
823
+ domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
824
+ domBuilder.endDTD();
825
+ return p.getIndex();
826
+ }
827
+ default:
828
+ return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
829
+ }
830
+ }
595
831
 
596
- function parseInstruction(source,start,domBuilder){
597
- var end = source.indexOf('?>',start);
598
- if(end){
599
- var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
600
- if(match){
601
- var len = match[0].length;
602
- domBuilder.processingInstruction(match[1], match[2]) ;
603
- return end+2;
604
- }else{//error
605
- return -1;
832
+ function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
833
+ var match = source.substring(start).match(g.PI);
834
+ if (!match) {
835
+ return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
836
+ }
837
+ if (match[1].toLowerCase() === 'xml') {
838
+ if (start > 0) {
839
+ return errorHandler.fatalError(
840
+ 'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
841
+ );
842
+ }
843
+ if (!g.XMLDecl.test(source.substring(start))) {
844
+ return errorHandler.fatalError('xml declaration is not well-formed');
606
845
  }
607
846
  }
608
- return -1;
847
+ domBuilder.processingInstruction(match[1], match[2]);
848
+ return start + match[0].length;
609
849
  }
610
850
 
611
- function ElementAttributes(){
612
- this.attributeNames = {}
851
+ function ElementAttributes() {
852
+ this.attributeNames = {};
613
853
  }
854
+
614
855
  ElementAttributes.prototype = {
615
- setTagName:function(tagName){
616
- if(!tagNamePattern.test(tagName)){
617
- throw new Error('invalid tagName:'+tagName)
856
+ setTagName: function (tagName) {
857
+ if (!g.QName_exact.test(tagName)) {
858
+ throw new Error('invalid tagName:' + tagName);
618
859
  }
619
- this.tagName = tagName
860
+ this.tagName = tagName;
620
861
  },
621
- addValue:function(qName, value, offset) {
622
- if(!tagNamePattern.test(qName)){
623
- throw new Error('invalid attribute:'+qName)
862
+ addValue: function (qName, value, offset) {
863
+ if (!g.QName_exact.test(qName)) {
864
+ throw new Error('invalid attribute:' + qName);
624
865
  }
625
866
  this.attributeNames[qName] = this.length;
626
- this[this.length++] = {qName:qName,value:value,offset:offset}
867
+ this[this.length++] = { qName: qName, value: value, offset: offset };
627
868
  },
628
- length:0,
629
- getLocalName:function(i){return this[i].localName},
630
- getLocator:function(i){return this[i].locator},
631
- getQName:function(i){return this[i].qName},
632
- getURI:function(i){return this[i].uri},
633
- getValue:function(i){return this[i].value}
634
- // ,getIndex:function(uri, localName)){
635
- // if(localName){
636
- //
637
- // }else{
638
- // var qName = uri
639
- // }
640
- // },
641
- // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
642
- // getType:function(uri,localName){}
643
- // getType:function(i){},
644
- }
645
-
646
-
647
-
648
- function split(source,start){
649
- var match;
650
- var buf = [];
651
- var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
652
- reg.lastIndex = start;
653
- reg.exec(source);//skip <
654
- while(match = reg.exec(source)){
655
- buf.push(match);
656
- if(match[1])return buf;
657
- }
658
- }
869
+ length: 0,
870
+ getLocalName: function (i) {
871
+ return this[i].localName;
872
+ },
873
+ getLocator: function (i) {
874
+ return this[i].locator;
875
+ },
876
+ getQName: function (i) {
877
+ return this[i].qName;
878
+ },
879
+ getURI: function (i) {
880
+ return this[i].uri;
881
+ },
882
+ getValue: function (i) {
883
+ return this[i].value;
884
+ },
885
+ // ,getIndex:function(uri, localName)){
886
+ // if(localName){
887
+ //
888
+ // }else{
889
+ // var qName = uri
890
+ // }
891
+ // },
892
+ // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
893
+ // getType:function(uri,localName){}
894
+ // getType:function(i){},
895
+ };
659
896
 
660
897
  exports.XMLReader = XMLReader;
661
- exports.ParseError = ParseError;
898
+ exports.parseUtils = parseUtils;
899
+ exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;