@xmldom/xmldom 0.9.0-beta.1 → 0.9.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/sax.js CHANGED
@@ -1,246 +1,224 @@
1
- 'use strict'
1
+ 'use strict';
2
+
3
+ var conventions = require('./conventions');
4
+ var g = require('./grammar');
2
5
 
3
- var conventions = require("./conventions");
4
- var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
5
6
  var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
7
+ var isHTMLMimeType = conventions.isHTMLMimeType;
8
+ var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
6
9
  var NAMESPACE = conventions.NAMESPACE;
7
- var MIME_TYPE = conventions.MIME_TYPE;
8
-
9
- //[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
10
- //[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
11
- //[5] Name ::= NameStartChar (NameChar)*
12
- var nameStartChar = /[A-Z_a-z\xC0-\xD6\xD8-\xF6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]///\u10000-\uEFFFF
13
- var nameChar = new RegExp("[\\-\\.0-9"+nameStartChar.source.slice(1,-1)+"\\u00B7\\u0300-\\u036F\\u203F-\\u2040]");
14
- var tagNamePattern = new RegExp('^'+nameStartChar.source+nameChar.source+'*(?:\:'+nameStartChar.source+nameChar.source+'*)?$');
15
- //var tagNamePattern = /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
10
+ var ParseError = conventions.ParseError;
11
+
16
12
  //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
17
13
 
18
14
  //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
19
15
  //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
20
- var S_TAG = 0;//tag name offerring
21
- var S_ATTR = 1;//attr name offerring
22
- var S_ATTR_SPACE=2;//attr name end and space offer
23
- var S_EQ = 3;//=space?
24
- var S_ATTR_NOQUOT_VALUE = 4;//attr value(no quot value only)
25
- var S_ATTR_END = 5;//attr value end and no space(quot end)
26
- var S_TAG_SPACE = 6;//(attr value end || tag end ) && (space offer)
27
- var S_TAG_CLOSE = 7;//closed el<el />
28
-
29
- /**
30
- * Creates an error that will not be caught by XMLReader aka the SAX parser.
31
- *
32
- * @param {string} message
33
- * @param {any?} locator Optional, can provide details about the location in the source
34
- * @constructor
35
- */
36
- function ParseError(message, locator) {
37
- this.message = message
38
- this.locator = locator
39
- if(Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
40
- }
41
- ParseError.prototype = new Error();
42
- ParseError.prototype.name = ParseError.name
43
-
44
- function XMLReader(){
16
+ var S_TAG = 0; //tag name offerring
17
+ var S_ATTR = 1; //attr name offerring
18
+ var S_ATTR_SPACE = 2; //attr name end and space offer
19
+ var S_EQ = 3; //=space?
20
+ var S_ATTR_NOQUOT_VALUE = 4; //attr value(no quot value only)
21
+ var S_ATTR_END = 5; //attr value end and no space(quot end)
22
+ var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
23
+ var S_TAG_CLOSE = 7; //closed el<el />
45
24
 
46
- }
25
+ function XMLReader() {}
47
26
 
48
27
  XMLReader.prototype = {
49
- parse:function(source,defaultNSMap,entityMap){
28
+ parse: function (source, defaultNSMap, entityMap) {
50
29
  var domBuilder = this.domBuilder;
51
30
  domBuilder.startDocument();
52
- _copy(defaultNSMap ,defaultNSMap = {})
53
- parse(source,defaultNSMap,entityMap,
54
- domBuilder,this.errorHandler);
31
+ _copy(defaultNSMap, (defaultNSMap = {}));
32
+ parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
55
33
  domBuilder.endDocument();
56
- }
57
- }
58
- function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){
59
- var isHTML = MIME_TYPE.isHTML(domBuilder.mimeType);
34
+ },
35
+ };
36
+
37
+ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
38
+ var isHTML = isHTMLMimeType(domBuilder.mimeType);
39
+
60
40
  function fixedFromCharCode(code) {
61
41
  // String.prototype.fromCharCode does not supports
62
42
  // > 2 bytes unicode chars directly
63
43
  if (code > 0xffff) {
64
44
  code -= 0x10000;
65
- var surrogate1 = 0xd800 + (code >> 10)
66
- , surrogate2 = 0xdc00 + (code & 0x3ff);
45
+ var surrogate1 = 0xd800 + (code >> 10),
46
+ surrogate2 = 0xdc00 + (code & 0x3ff);
67
47
 
68
48
  return String.fromCharCode(surrogate1, surrogate2);
69
49
  } else {
70
50
  return String.fromCharCode(code);
71
51
  }
72
52
  }
73
- function entityReplacer(a){
74
- var k = a.slice(1,-1);
53
+
54
+ function entityReplacer(a) {
55
+ var k = a.slice(1, -1);
75
56
  if (Object.hasOwnProperty.call(entityMap, k)) {
76
57
  return entityMap[k];
77
- }else if(k.charAt(0) === '#'){
78
- return fixedFromCharCode(parseInt(k.substr(1).replace('x','0x')))
79
- }else{
80
- errorHandler.error('entity not found:'+a);
58
+ } else if (k.charAt(0) === '#') {
59
+ return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
60
+ } else {
61
+ errorHandler.error('entity not found:' + a);
81
62
  return a;
82
63
  }
83
64
  }
84
- function appendText(end){//has some bugs
85
- if(end>start){
86
- var xt = source.substring(start,end).replace(/&#?\w+;/g,entityReplacer);
87
- locator&&position(start);
88
- domBuilder.characters(xt,0,end-start);
89
- start = end
65
+
66
+ function appendText(end) {
67
+ //has some bugs
68
+ if (end > start) {
69
+ var xt = source.substring(start, end).replace(/&#?\w+;/g, entityReplacer);
70
+ locator && position(start);
71
+ domBuilder.characters(xt, 0, end - start);
72
+ start = end;
90
73
  }
91
74
  }
92
- function position(p,m){
93
- while(p>=lineEnd && (m = linePattern.exec(source))){
75
+
76
+ function position(p, m) {
77
+ while (p >= lineEnd && (m = linePattern.exec(source))) {
94
78
  lineStart = m.index;
95
79
  lineEnd = lineStart + m[0].length;
96
80
  locator.lineNumber++;
97
- //console.log('line++:',locator,startPos,endPos)
98
81
  }
99
- locator.columnNumber = p-lineStart+1;
82
+ locator.columnNumber = p - lineStart + 1;
100
83
  }
84
+
101
85
  var lineStart = 0;
102
86
  var lineEnd = 0;
103
- var linePattern = /.*(?:\r\n?|\n)|.*$/g
87
+ var linePattern = /.*(?:\r\n?|\n)|.*$/g;
104
88
  var locator = domBuilder.locator;
105
89
 
106
- var parseStack = [{currentNSMap:defaultNSMapCopy}]
90
+ var parseStack = [{ currentNSMap: defaultNSMapCopy }];
107
91
  var closeMap = {};
108
92
  var start = 0;
109
- while(true){
110
- try{
111
- var tagStart = source.indexOf('<',start);
112
- if(tagStart<0){
113
- if(!source.substr(start).match(/^\s*$/)){
93
+ while (true) {
94
+ try {
95
+ var tagStart = source.indexOf('<', start);
96
+ if (tagStart < 0) {
97
+ if (!source.substr(start).match(/^\s*$/)) {
114
98
  var doc = domBuilder.doc;
115
- var text = doc.createTextNode(source.substr(start));
116
- doc.appendChild(text);
117
- domBuilder.currentElement = text;
99
+ var text = doc.createTextNode(source.substr(start));
100
+ doc.appendChild(text);
101
+ domBuilder.currentElement = text;
118
102
  }
119
103
  return;
120
104
  }
121
- if(tagStart>start){
105
+ if (tagStart > start) {
122
106
  appendText(tagStart);
123
107
  }
124
- switch(source.charAt(tagStart+1)){
125
- case '/':
126
- var end = source.indexOf('>',tagStart+3);
127
- var tagName = source.substring(tagStart + 2, end).replace(/[ \t\n\r]+$/g, '');
128
- var config = parseStack.pop();
129
- if(end<0){
130
-
131
- tagName = source.substring(tagStart+2).replace(/[\s<].*/,'');
132
- errorHandler.error("end tag name: "+tagName+' is not complete:'+config.tagName);
133
- end = tagStart+1+tagName.length;
134
- }else if(tagName.match(/\s</)){
135
- tagName = tagName.replace(/[\s<].*/,'');
136
- errorHandler.error("end tag name: "+tagName+' maybe not complete');
137
- end = tagStart+1+tagName.length;
138
- }
139
- var localNSMap = config.localNSMap;
140
- var endMatch = config.tagName == tagName;
141
- var endIgnoreCaseMach = endMatch || config.tagName&&config.tagName.toLowerCase() == tagName.toLowerCase()
142
- if(endIgnoreCaseMach){
143
- domBuilder.endElement(config.uri,config.localName,tagName);
144
- if(localNSMap){
145
- for(var prefix in localNSMap){
146
- domBuilder.endPrefixMapping(prefix) ;
147
- }
108
+ switch (source.charAt(tagStart + 1)) {
109
+ case '/':
110
+ var config = parseStack.pop();
111
+ var end = source.indexOf('>', tagStart + 3);
112
+ var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
113
+ var tagNameMatch = g.QName_group.exec(tagNameRaw);
114
+ // for the root level the config does not contain the tagName
115
+ var tagName =
116
+ tagNameMatch && tagNameMatch[1] ? tagNameMatch[1] : config.tagName || domBuilder.doc.documentElement.tagName;
117
+ if (end < 0) {
118
+ errorHandler.error('end tag name: ' + tagName + ' is not complete');
119
+ end = tagStart + 1 + tagName.length;
120
+ } else if (tagNameRaw.match(/</) && !isHTML) {
121
+ errorHandler.error('end tag name: ' + tagName + ' maybe not complete');
148
122
  }
149
- if(!endMatch){
150
- errorHandler.fatalError("end tag name: "+tagName+' is not match the current start tagName:'+config.tagName ); // No known test case
123
+ var localNSMap = config.localNSMap;
124
+ var endMatch = config.tagName == tagName;
125
+ var endIgnoreCaseMach = endMatch || (config.tagName && config.tagName.toLowerCase() == tagName.toLowerCase());
126
+ if (endIgnoreCaseMach) {
127
+ domBuilder.endElement(config.uri, config.localName, tagName);
128
+ if (localNSMap) {
129
+ for (var prefix in localNSMap) {
130
+ if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
131
+ domBuilder.endPrefixMapping(prefix);
132
+ }
133
+ }
134
+ }
135
+ if (!endMatch) {
136
+ // No known test case
137
+ return errorHandler.fatalError(
138
+ 'end tag name: ' + tagName + ' is not match the current start tagName:' + config.tagName
139
+ );
140
+ }
141
+ } else {
142
+ parseStack.push(config);
151
143
  }
152
- }else{
153
- parseStack.push(config)
154
- }
155
144
 
156
- end++;
157
- break;
145
+ end++;
146
+ break;
158
147
  // end elment
159
- case '?':// <?...?>
160
- locator&&position(tagStart);
161
- end = parseInstruction(source,tagStart,domBuilder);
162
- break;
163
- case '!':// <!doctype,<![CDATA,<!--
164
- locator&&position(tagStart);
165
- end = parseDCC(source,tagStart,domBuilder,errorHandler);
166
- break;
167
- default:
168
- locator&&position(tagStart);
169
- var el = new ElementAttributes();
170
- var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
171
- //elStartEnd
172
- var end = parseElementStartPart(
173
- source,
174
- tagStart,
175
- el,
176
- currentNSMap,
177
- entityReplacer,
178
- errorHandler,
179
- isHTML
180
- )
181
- var len = el.length;
182
-
183
-
184
- if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
185
- el.closed = true;
186
- if(!isHTML){
187
- errorHandler.warning('unclosed xml attribute');
188
- }
189
- }
190
- if(locator && len){
191
- var locator2 = copyLocator(locator,{});
192
- //try{//attribute position fixed
193
- for(var i = 0;i<len;i++){
194
- var a = el[i];
195
- position(a.offset);
196
- a.locator = copyLocator(locator,{});
197
- }
198
- domBuilder.locator = locator2
199
- if(appendElement(el,domBuilder,currentNSMap)){
200
- parseStack.push(el)
148
+ case '?': // <?...?>
149
+ locator && position(tagStart);
150
+ end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
151
+ break;
152
+ case '!': // <!doctype,<![CDATA,<!--
153
+ locator && position(tagStart);
154
+ end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler);
155
+ break;
156
+ default:
157
+ locator && position(tagStart);
158
+ var el = new ElementAttributes();
159
+ var currentNSMap = parseStack[parseStack.length - 1].currentNSMap;
160
+ //elStartEnd
161
+ var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
162
+ var len = el.length;
163
+
164
+ if (!el.closed && fixSelfClosed(source, end, el.tagName, closeMap)) {
165
+ el.closed = true;
166
+ if (!isHTML) {
167
+ errorHandler.warning('unclosed xml attribute');
168
+ }
201
169
  }
202
- domBuilder.locator = locator;
203
- }else{
204
- if(appendElement(el,domBuilder,currentNSMap)){
205
- parseStack.push(el)
170
+ if (locator && len) {
171
+ var locator2 = copyLocator(locator, {});
172
+ //try{//attribute position fixed
173
+ for (var i = 0; i < len; i++) {
174
+ var a = el[i];
175
+ position(a.offset);
176
+ a.locator = copyLocator(locator, {});
177
+ }
178
+ domBuilder.locator = locator2;
179
+ if (appendElement(el, domBuilder, currentNSMap)) {
180
+ parseStack.push(el);
181
+ }
182
+ domBuilder.locator = locator;
183
+ } else {
184
+ if (appendElement(el, domBuilder, currentNSMap)) {
185
+ parseStack.push(el);
186
+ }
206
187
  }
207
- }
208
188
 
209
- if (isHTML && !el.closed) {
210
- end = parseHtmlSpecialContent(source,end,el.tagName,entityReplacer,domBuilder)
211
- } else {
212
- end++;
213
- }
189
+ if (isHTML && !el.closed) {
190
+ end = parseHtmlSpecialContent(source, end, el.tagName, entityReplacer, domBuilder);
191
+ } else {
192
+ end++;
193
+ }
214
194
  }
215
- }catch(e){
195
+ } catch (e) {
216
196
  if (e instanceof ParseError) {
217
197
  throw e;
218
198
  }
219
- errorHandler.error('element parse error: '+e)
199
+ errorHandler.error('element parse error: ' + e);
220
200
  end = -1;
221
201
  }
222
- if(end>start){
202
+ if (end > start) {
223
203
  start = end;
224
- }else{
204
+ } else {
225
205
  //TODO: 这里有可能sax回退,有位置错误风险
226
- appendText(Math.max(tagStart,start)+1);
206
+ appendText(Math.max(tagStart, start) + 1);
227
207
  }
228
208
  }
229
209
  }
230
- function copyLocator(f,t){
210
+
211
+ function copyLocator(f, t) {
231
212
  t.lineNumber = f.lineNumber;
232
213
  t.columnNumber = f.columnNumber;
233
214
  return t;
234
215
  }
235
216
 
236
217
  /**
237
- * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
238
- * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
218
+ * @returns end of the elementStartPart(end of elementEndPart for selfClosed el)
219
+ * @see {@link #appendElement}
239
220
  */
240
- function parseElementStartPart(
241
- source,start,el,currentNSMap,entityReplacer,errorHandler, isHTML
242
- ){
243
-
221
+ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
244
222
  /**
245
223
  * @param {string} qname
246
224
  * @param {string} value
@@ -248,7 +226,7 @@ function parseElementStartPart(
248
226
  */
249
227
  function addAttribute(qname, value, startIndex) {
250
228
  if (el.attributeNames.hasOwnProperty(qname)) {
251
- errorHandler.fatalError('Attribute ' + qname + ' redefined')
229
+ return errorHandler.fatalError('Attribute ' + qname + ' redefined');
252
230
  }
253
231
  el.addValue(
254
232
  qname,
@@ -258,404 +236,635 @@ function parseElementStartPart(
258
236
  // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
259
237
  value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
260
238
  startIndex
261
- )
239
+ );
262
240
  }
241
+
263
242
  var attrName;
264
243
  var value;
265
244
  var p = ++start;
266
- var s = S_TAG;//status
267
- while(true){
245
+ var s = S_TAG; //status
246
+ while (true) {
268
247
  var c = source.charAt(p);
269
- switch(c){
270
- case '=':
271
- if(s === S_ATTR){//attrName
272
- attrName = source.slice(start,p);
273
- s = S_EQ;
274
- }else if(s === S_ATTR_SPACE){
275
- s = S_EQ;
276
- }else{
277
- //fatalError: equal must after attrName or space after attrName
278
- throw new Error('attribute equal must after attrName'); // No known test case
279
- }
280
- break;
281
- case '\'':
282
- case '"':
283
- if(s === S_EQ || s === S_ATTR //|| s == S_ATTR_SPACE
284
- ){//equal
285
- if(s === S_ATTR){
286
- errorHandler.warning('attribute value must after "="')
287
- attrName = source.slice(start,p)
248
+ switch (c) {
249
+ case '=':
250
+ if (s === S_ATTR) {
251
+ //attrName
252
+ attrName = source.slice(start, p);
253
+ s = S_EQ;
254
+ } else if (s === S_ATTR_SPACE) {
255
+ s = S_EQ;
256
+ } else {
257
+ //fatalError: equal must after attrName or space after attrName
258
+ throw new Error('attribute equal must after attrName'); // No known test case
288
259
  }
289
- start = p+1;
290
- p = source.indexOf(c,start)
291
- if(p>0){
260
+ break;
261
+ case "'":
262
+ case '"':
263
+ if (
264
+ s === S_EQ ||
265
+ s === S_ATTR //|| s == S_ATTR_SPACE
266
+ ) {
267
+ //equal
268
+ if (s === S_ATTR) {
269
+ errorHandler.warning('attribute value must after "="');
270
+ attrName = source.slice(start, p);
271
+ }
272
+ start = p + 1;
273
+ p = source.indexOf(c, start);
274
+ if (p > 0) {
275
+ value = source.slice(start, p);
276
+ addAttribute(attrName, value, start - 1);
277
+ s = S_ATTR_END;
278
+ } else {
279
+ //fatalError: no end quot match
280
+ throw new Error("attribute value no end '" + c + "' match");
281
+ }
282
+ } else if (s == S_ATTR_NOQUOT_VALUE) {
292
283
  value = source.slice(start, p);
293
- addAttribute(attrName, value, start-1);
284
+ addAttribute(attrName, value, start);
285
+ errorHandler.warning('attribute "' + attrName + '" missed start quot(' + c + ')!!');
286
+ start = p + 1;
294
287
  s = S_ATTR_END;
295
- }else{
296
- //fatalError: no end quot match
297
- throw new Error('attribute value no end \''+c+'\' match');
288
+ } else {
289
+ //fatalError: no equal before
290
+ throw new Error('attribute value must after "="'); // No known test case
298
291
  }
299
- }else if(s == S_ATTR_NOQUOT_VALUE){
300
- value = source.slice(start, p);
301
- addAttribute(attrName, value, start);
302
- errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
303
- start = p+1;
304
- s = S_ATTR_END
305
- }else{
306
- //fatalError: no equal before
307
- throw new Error('attribute value must after "="'); // No known test case
308
- }
309
- break;
310
- case '/':
311
- switch(s){
312
- case S_TAG:
313
- el.setTagName(source.slice(start,p));
314
- case S_ATTR_END:
315
- case S_TAG_SPACE:
316
- case S_TAG_CLOSE:
317
- s =S_TAG_CLOSE;
318
- el.closed = true;
319
- case S_ATTR_NOQUOT_VALUE:
320
- case S_ATTR:
321
- case S_ATTR_SPACE:
322
292
  break;
323
- //case S_EQ:
324
- default:
325
- throw new Error("attribute invalid close char('/')") // No known test case
326
- }
327
- break;
328
- case ''://end document
329
- errorHandler.error('unexpected end of input');
330
- if(s == S_TAG){
331
- el.setTagName(source.slice(start,p));
332
- }
333
- return p;
334
- case '>':
335
- switch(s){
336
- case S_TAG:
337
- el.setTagName(source.slice(start,p));
338
- case S_ATTR_END:
339
- case S_TAG_SPACE:
340
- case S_TAG_CLOSE:
341
- break;//normal
342
- case S_ATTR_NOQUOT_VALUE://Compatible state
343
- case S_ATTR:
344
- value = source.slice(start,p);
345
- if(value.slice(-1) === '/'){
346
- el.closed = true;
347
- value = value.slice(0,-1)
348
- }
349
- case S_ATTR_SPACE:
350
- if(s === S_ATTR_SPACE){
351
- value = attrName;
352
- }
353
- if(s == S_ATTR_NOQUOT_VALUE){
354
- errorHandler.warning('attribute "'+value+'" missed quot(")!');
355
- addAttribute(attrName, value, start)
356
- }else{
357
- if(!isHTML){
358
- errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
359
- }
360
- addAttribute(value, value, start)
293
+ case '/':
294
+ switch (s) {
295
+ case S_TAG:
296
+ el.setTagName(source.slice(start, p));
297
+ case S_ATTR_END:
298
+ case S_TAG_SPACE:
299
+ case S_TAG_CLOSE:
300
+ s = S_TAG_CLOSE;
301
+ el.closed = true;
302
+ case S_ATTR_NOQUOT_VALUE:
303
+ case S_ATTR:
304
+ break;
305
+ case S_ATTR_SPACE:
306
+ el.closed = true;
307
+ break;
308
+ //case S_EQ:
309
+ default:
310
+ throw new Error("attribute invalid close char('/')"); // No known test case
361
311
  }
362
312
  break;
363
- case S_EQ:
364
- throw new Error('attribute value missed!!');
365
- }
366
- // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
367
- return p;
368
- /*xml space '\x20' | #x9 | #xD | #xA; */
369
- case '\u0080':
370
- c = ' ';
371
- default:
372
- if(c<= ' '){//space
373
- switch(s){
374
- case S_TAG:
375
- el.setTagName(source.slice(start,p));//tagName
376
- s = S_TAG_SPACE;
377
- break;
378
- case S_ATTR:
379
- attrName = source.slice(start,p)
380
- s = S_ATTR_SPACE;
381
- break;
382
- case S_ATTR_NOQUOT_VALUE:
383
- var value = source.slice(start, p);
384
- errorHandler.warning('attribute "'+value+'" missed quot(")!!');
385
- addAttribute(attrName, value, start)
386
- case S_ATTR_END:
387
- s = S_TAG_SPACE;
388
- break;
389
- //case S_TAG_SPACE:
390
- //case S_EQ:
391
- //case S_ATTR_SPACE:
392
- // void();break;
393
- //case S_TAG_CLOSE:
394
- //ignore warning
313
+ case '': //end document
314
+ errorHandler.error('unexpected end of input');
315
+ if (s == S_TAG) {
316
+ el.setTagName(source.slice(start, p));
395
317
  }
396
- }else{//not space
397
- //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
398
- //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
399
- switch(s){
400
- //case S_TAG:void();break;
401
- //case S_ATTR:void();break;
402
- //case S_ATTR_NOQUOT_VALUE:void();break;
403
- case S_ATTR_SPACE:
404
- var tagName = el.tagName;
405
- if (!isHTML) {
406
- errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
318
+ return p;
319
+ case '>':
320
+ switch (s) {
321
+ case S_TAG:
322
+ el.setTagName(source.slice(start, p));
323
+ case S_ATTR_END:
324
+ case S_TAG_SPACE:
325
+ case S_TAG_CLOSE:
326
+ break; //normal
327
+ case S_ATTR_NOQUOT_VALUE: //Compatible state
328
+ case S_ATTR:
329
+ value = source.slice(start, p);
330
+ if (value.slice(-1) === '/') {
331
+ el.closed = true;
332
+ value = value.slice(0, -1);
333
+ }
334
+ case S_ATTR_SPACE:
335
+ if (s === S_ATTR_SPACE) {
336
+ value = attrName;
337
+ }
338
+ if (s == S_ATTR_NOQUOT_VALUE) {
339
+ errorHandler.warning('attribute "' + value + '" missed quot(")!');
340
+ addAttribute(attrName, value, start);
341
+ } else {
342
+ if (!isHTML) {
343
+ errorHandler.warning('attribute "' + value + '" missed value!! "' + value + '" instead!!');
344
+ }
345
+ addAttribute(value, value, start);
346
+ }
347
+ break;
348
+ case S_EQ:
349
+ throw new Error('attribute value missed!!');
350
+ }
351
+ return p;
352
+ /*xml space '\x20' | #x9 | #xD | #xA; */
353
+ case '\u0080':
354
+ c = ' ';
355
+ default:
356
+ if (c <= ' ') {
357
+ //space
358
+ switch (s) {
359
+ case S_TAG:
360
+ el.setTagName(source.slice(start, p)); //tagName
361
+ s = S_TAG_SPACE;
362
+ break;
363
+ case S_ATTR:
364
+ attrName = source.slice(start, p);
365
+ s = S_ATTR_SPACE;
366
+ break;
367
+ case S_ATTR_NOQUOT_VALUE:
368
+ var value = source.slice(start, p);
369
+ errorHandler.warning('attribute "' + value + '" missed quot(")!!');
370
+ addAttribute(attrName, value, start);
371
+ case S_ATTR_END:
372
+ s = S_TAG_SPACE;
373
+ break;
374
+ //case S_TAG_SPACE:
375
+ //case S_EQ:
376
+ //case S_ATTR_SPACE:
377
+ // void();break;
378
+ //case S_TAG_CLOSE:
379
+ //ignore warning
380
+ }
381
+ } else {
382
+ //not space
383
+ //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
384
+ //S_ATTR_SPACE, S_ATTR_END, S_TAG_SPACE, S_TAG_CLOSE
385
+ switch (s) {
386
+ //case S_TAG:void();break;
387
+ //case S_ATTR:void();break;
388
+ //case S_ATTR_NOQUOT_VALUE:void();break;
389
+ case S_ATTR_SPACE:
390
+ if (!isHTML) {
391
+ errorHandler.warning('attribute "' + attrName + '" missed value!! "' + attrName + '" instead2!!');
392
+ }
393
+ addAttribute(attrName, attrName, start);
394
+ start = p;
395
+ s = S_ATTR;
396
+ break;
397
+ case S_ATTR_END:
398
+ errorHandler.warning('attribute space is required"' + attrName + '"!!');
399
+ case S_TAG_SPACE:
400
+ s = S_ATTR;
401
+ start = p;
402
+ break;
403
+ case S_EQ:
404
+ s = S_ATTR_NOQUOT_VALUE;
405
+ start = p;
406
+ break;
407
+ case S_TAG_CLOSE:
408
+ throw new Error("elements closed character '/' and '>' must be connected to");
407
409
  }
408
- addAttribute(attrName, attrName, start);
409
- start = p;
410
- s = S_ATTR;
411
- break;
412
- case S_ATTR_END:
413
- errorHandler.warning('attribute space is required"'+attrName+'"!!')
414
- case S_TAG_SPACE:
415
- s = S_ATTR;
416
- start = p;
417
- break;
418
- case S_EQ:
419
- s = S_ATTR_NOQUOT_VALUE;
420
- start = p;
421
- break;
422
- case S_TAG_CLOSE:
423
- throw new Error("elements closed character '/' and '>' must be connected to");
424
410
  }
425
- }
426
- }//end outer switch
411
+ } //end outer switch
427
412
  //console.log('p++',p)
428
413
  p++;
429
414
  }
430
415
  }
416
+
431
417
  /**
432
- * @return true if has new namespace define
418
+ * @returns `true` if a new namespace has been defined.
433
419
  */
434
- function appendElement(el,domBuilder,currentNSMap){
420
+ function appendElement(el, domBuilder, currentNSMap) {
435
421
  var tagName = el.tagName;
436
422
  var localNSMap = null;
437
423
  //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
438
424
  var i = el.length;
439
- while(i--){
425
+ while (i--) {
440
426
  var a = el[i];
441
427
  var qName = a.qName;
442
428
  var value = a.value;
443
429
  var nsp = qName.indexOf(':');
444
- if(nsp>0){
445
- var prefix = a.prefix = qName.slice(0,nsp);
446
- var localName = qName.slice(nsp+1);
447
- var nsPrefix = prefix === 'xmlns' && localName
448
- }else{
430
+ if (nsp > 0) {
431
+ var prefix = (a.prefix = qName.slice(0, nsp));
432
+ var localName = qName.slice(nsp + 1);
433
+ var nsPrefix = prefix === 'xmlns' && localName;
434
+ } else {
449
435
  localName = qName;
450
- prefix = null
451
- nsPrefix = qName === 'xmlns' && ''
436
+ prefix = null;
437
+ nsPrefix = qName === 'xmlns' && '';
452
438
  }
453
439
  //can not set prefix,because prefix !== ''
454
- a.localName = localName ;
440
+ a.localName = localName;
455
441
  //prefix == null for no ns prefix attribute
456
- if(nsPrefix !== false){//hack!!
457
- if(localNSMap == null){
458
- localNSMap = {}
442
+ if (nsPrefix !== false) {
443
+ //hack!!
444
+ if (localNSMap == null) {
445
+ localNSMap = {};
459
446
  //console.log(currentNSMap,0)
460
- _copy(currentNSMap,currentNSMap={})
447
+ _copy(currentNSMap, (currentNSMap = {}));
461
448
  //console.log(currentNSMap,1)
462
449
  }
463
450
  currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
464
- a.uri = NAMESPACE.XMLNS
465
- domBuilder.startPrefixMapping(nsPrefix, value)
451
+ a.uri = NAMESPACE.XMLNS;
452
+ domBuilder.startPrefixMapping(nsPrefix, value);
466
453
  }
467
454
  }
468
455
  var i = el.length;
469
- while(i--){
456
+ while (i--) {
470
457
  a = el[i];
471
- var prefix = a.prefix;
472
- if(prefix){//no prefix attribute has no namespace
473
- if(prefix === 'xml'){
458
+ if (a.prefix) {
459
+ //no prefix attribute has no namespace
460
+ if (a.prefix === 'xml') {
474
461
  a.uri = NAMESPACE.XML;
475
- }if(prefix !== 'xmlns'){
476
- a.uri = currentNSMap[prefix || '']
462
+ }
463
+ if (a.prefix !== 'xmlns') {
464
+ a.uri = currentNSMap[a.prefix];
477
465
  }
478
466
  }
479
467
  }
480
468
  var nsp = tagName.indexOf(':');
481
- if(nsp>0){
482
- prefix = el.prefix = tagName.slice(0,nsp);
483
- localName = el.localName = tagName.slice(nsp+1);
484
- }else{
485
- prefix = null;//important!!
469
+ if (nsp > 0) {
470
+ prefix = el.prefix = tagName.slice(0, nsp);
471
+ localName = el.localName = tagName.slice(nsp + 1);
472
+ } else {
473
+ prefix = null; //important!!
486
474
  localName = el.localName = tagName;
487
475
  }
488
476
  //no prefix element has default namespace
489
- var ns = el.uri = currentNSMap[prefix || ''];
490
- domBuilder.startElement(ns,localName,tagName,el);
477
+ var ns = (el.uri = currentNSMap[prefix || '']);
478
+ domBuilder.startElement(ns, localName, tagName, el);
491
479
  //endPrefixMapping and startPrefixMapping have not any help for dom builder
492
480
  //localNSMap = null
493
- if(el.closed){
494
- domBuilder.endElement(ns,localName,tagName);
495
- if(localNSMap){
496
- for(prefix in localNSMap){
497
- domBuilder.endPrefixMapping(prefix)
481
+ if (el.closed) {
482
+ domBuilder.endElement(ns, localName, tagName);
483
+ if (localNSMap) {
484
+ for (prefix in localNSMap) {
485
+ if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
486
+ domBuilder.endPrefixMapping(prefix);
487
+ }
498
488
  }
499
489
  }
500
- }else{
490
+ } else {
501
491
  el.currentNSMap = currentNSMap;
502
492
  el.localNSMap = localNSMap;
503
493
  //parseStack.push(el);
504
494
  return true;
505
495
  }
506
496
  }
507
- function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
497
+
498
+ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
508
499
  // https://html.spec.whatwg.org/#raw-text-elements
509
500
  // https://html.spec.whatwg.org/#escapable-raw-text-elements
510
501
  // https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
511
502
  // TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
512
503
  var isEscapableRaw = isHTMLEscapableRawTextElement(tagName);
513
- if(isEscapableRaw || isHTMLRawTextElement(tagName)){
514
- var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
515
- var text = source.substring(elStartEnd+1,elEndStart);
504
+ if (isEscapableRaw || isHTMLRawTextElement(tagName)) {
505
+ var elEndStart = source.indexOf('</' + tagName + '>', elStartEnd);
506
+ var text = source.substring(elStartEnd + 1, elEndStart);
516
507
 
517
- if(isEscapableRaw){
518
- text = text.replace(/&#?\w+;/g,entityReplacer);
508
+ if (isEscapableRaw) {
509
+ text = text.replace(/&#?\w+;/g, entityReplacer);
519
510
  }
520
- domBuilder.characters(text,0,text.length);
521
- return elEndStart;
511
+ domBuilder.characters(text, 0, text.length);
512
+ return elEndStart;
522
513
  }
523
- return elStartEnd+1;
514
+ return elStartEnd + 1;
524
515
  }
525
- function fixSelfClosed(source,elStartEnd,tagName,closeMap){
516
+
517
+ function fixSelfClosed(source, elStartEnd, tagName, closeMap) {
526
518
  //if(tagName in closeMap){
527
519
  var pos = closeMap[tagName];
528
- if(pos == null){
520
+ if (pos == null) {
529
521
  //console.log(tagName)
530
- pos = source.lastIndexOf('</'+tagName+'>')
531
- if(pos<elStartEnd){//忘记闭合
532
- pos = source.lastIndexOf('</'+tagName)
522
+ pos = source.lastIndexOf('</' + tagName + '>');
523
+ if (pos < elStartEnd) {
524
+ //忘记闭合
525
+ pos = source.lastIndexOf('</' + tagName);
533
526
  }
534
- closeMap[tagName] =pos
527
+ closeMap[tagName] = pos;
535
528
  }
536
- return pos<elStartEnd;
529
+ return pos < elStartEnd;
537
530
  //}
538
531
  }
539
- function _copy(source,target){
540
- for(var n in source){target[n] = source[n]}
532
+
533
+ function _copy(source, target) {
534
+ for (var n in source) {
535
+ if (Object.prototype.hasOwnProperty.call(source, n)) {
536
+ target[n] = source[n];
537
+ }
538
+ }
541
539
  }
542
- function parseDCC(source,start,domBuilder,errorHandler){//sure start with '<!'
543
- var next= source.charAt(start+2)
544
- switch(next){
545
- case '-':
546
- if(source.charAt(start + 3) === '-'){
547
- var end = source.indexOf('-->',start+4);
548
- //append comment source.substring(4,end)//<!--
549
- if(end>start){
550
- domBuilder.comment(source,start+4,end-start-4);
551
- return end+3;
552
- }else{
553
- errorHandler.error("Unclosed comment");
554
- return -1;
540
+
541
+ /**
542
+ * @typedef ParseUtils
543
+ * @property {function(relativeIndex: number?): string | undefined} char
544
+ * Provides look ahead access to a singe character relative to the current index.
545
+ * @property {function(): number} getIndex
546
+ * Provides read-only access to the current index.
547
+ * @property {function(reg: RegExp): string | null} getMatch
548
+ * Applies the provided regular expression enforcing that it starts at the current index and
549
+ * returns the complete matching string,
550
+ * and moves the current index by the length of the matching string.
551
+ * @property {function(): string} getSource
552
+ * Provides read-only access to the complete source.
553
+ * @property {function(places: number?): void} skip
554
+ * moves the current index by places (defaults to 1)
555
+ * @property {function(): number} skipBlanks
556
+ * Moves the current index by the amount of white space that directly follows the current index
557
+ * and returns the amount of whitespace chars skipped (0..n),
558
+ * or -1 if the end of the source was reached.
559
+ * @property {function(): string} substringFromIndex
560
+ * creates a substring from the current index to the end of `source`
561
+ * @property {function(compareWith: string): boolean} substringStartsWith
562
+ * Checks if source contains `compareWith`,
563
+ * starting from the current index.
564
+ * @see {@link parseUtils}
565
+ */
566
+
567
+ /**
568
+ * A temporary scope for parsing and look ahead operations in `source`,
569
+ * starting from index `start`.
570
+ *
571
+ * Some operations move the current index by a number of positions,
572
+ * after which `getIndex` returns the new index.
573
+ *
574
+ * @param {string} source
575
+ * @param {number} start
576
+ * @returns {ParseUtils}
577
+ */
578
+ function parseUtils(source, start) {
579
+ var index = start;
580
+
581
+ function char(n) {
582
+ n = n || 0;
583
+ return source.charAt(index + n);
584
+ }
585
+
586
+ function skip(n) {
587
+ n = n || 1;
588
+ index += n;
589
+ }
590
+
591
+ function skipBlanks() {
592
+ var blanks = 0;
593
+ while (index < source.length) {
594
+ var c = char();
595
+ if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
596
+ return blanks;
555
597
  }
556
- }else{
557
- //error
558
- return -1;
598
+ blanks++;
599
+ skip();
600
+ }
601
+ return -1;
602
+ }
603
+ function substringFromIndex() {
604
+ return source.substring(index);
605
+ }
606
+ function substringStartsWith(text) {
607
+ return source.substring(index, index + text.length) === text;
608
+ }
609
+
610
+ function getMatch(args) {
611
+ var expr = g.reg('^', args);
612
+ var match = expr.exec(substringFromIndex());
613
+ if (match) {
614
+ skip(match[0].length);
615
+ return match[0];
616
+ }
617
+ return null;
618
+ }
619
+ return {
620
+ char: char,
621
+ getIndex: function () {
622
+ return index;
623
+ },
624
+ getMatch: getMatch,
625
+ getSource: function () {
626
+ return source;
627
+ },
628
+ skip: skip,
629
+ skipBlanks: skipBlanks,
630
+ substringFromIndex: substringFromIndex,
631
+ substringStartsWith: substringStartsWith,
632
+ };
633
+ }
634
+
635
+ /**
636
+ * @param {ParseUtils} p
637
+ * @param {DOMHandler} errorHandler
638
+ * @returns {string}
639
+ */
640
+ function parseDoctypeInternalSubset(p, errorHandler) {
641
+ /**
642
+ * @param {ParseUtils} p
643
+ * @param {DOMHandler} errorHandler
644
+ * @returns {string}
645
+ */
646
+ function parsePI(p, errorHandler) {
647
+ var match = g.PI.exec(p.substringFromIndex());
648
+ if (!match) {
649
+ return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
559
650
  }
560
- default:
561
- if(source.substr(start+3,6) == 'CDATA['){
562
- var end = source.indexOf(']]>',start+9);
563
- domBuilder.startCDATA();
564
- domBuilder.characters(source,start+9,end-start-9);
565
- domBuilder.endCDATA()
566
- return end+3;
651
+ if (match[1].toLowerCase() === 'xml') {
652
+ return errorHandler.fatalError(
653
+ 'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
654
+ );
567
655
  }
568
- //<!DOCTYPE
569
- //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
570
- var matchs = split(source,start);
571
- var len = matchs.length;
572
- if(len>1 && /!doctype/i.test(matchs[0][0])){
573
- var name = matchs[1][0];
574
- var pubid = false;
575
- var sysid = false;
576
- if(len>3){
577
- if(/^public$/i.test(matchs[2][0])){
578
- pubid = matchs[3][0];
579
- sysid = len>4 && matchs[4][0];
580
- }else if(/^system$/i.test(matchs[2][0])){
581
- sysid = matchs[3][0];
656
+ p.skip(match[0].length);
657
+ return match[0];
658
+ }
659
+ // Parse internal subset
660
+ var source = p.getSource();
661
+ if (p.char() === '[') {
662
+ p.skip(1);
663
+ var intSubsetStart = p.getIndex();
664
+ p.skipBlanks();
665
+ while (p.getIndex() < source.length) {
666
+ var current = null;
667
+ // Only in external subset
668
+ // if (char() === '<' && char(1) === '!' && char(2) === '[') {
669
+ // parseConditionalSections(p, errorHandler);
670
+ // } else
671
+ if (p.char() === '<' && p.char(1) === '!') {
672
+ switch (p.char(2)) {
673
+ case 'E':
674
+ if (p.char(3) === 'L') {
675
+ current = p.getMatch(g.elementdecl);
676
+ } else if (p.char(3) === 'N') {
677
+ current = p.getMatch(g.EntityDecl);
678
+ }
679
+ break;
680
+ case 'A':
681
+ current = p.getMatch(g.AttlistDecl);
682
+ break;
683
+ case 'N':
684
+ current = p.getMatch(g.NotationDecl);
685
+ break;
686
+ case '-':
687
+ current = p.getMatch(g.Comment);
688
+ break;
582
689
  }
690
+ } else if (p.char() === '<' && p.char(1) === '?') {
691
+ current = parsePI(p, errorHandler);
692
+ } else if (p.char() === '%') {
693
+ current = p.getMatch(g.PEReference);
694
+ } else {
695
+ return errorHandler.fatalError('Error detected in Markup declaration');
583
696
  }
584
- var lastMatch = matchs[len-1]
585
- domBuilder.startDTD(name, pubid, sysid);
586
- domBuilder.endDTD();
587
-
588
- return lastMatch.index+lastMatch[0].length
697
+ if (!current) {
698
+ return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
699
+ }
700
+ p.skipBlanks();
701
+ if (p.char() === ']') {
702
+ var internalSubset = source.substring(intSubsetStart, p.getIndex());
703
+ p.skip(1);
704
+ return internalSubset;
705
+ }
706
+ p.skipBlanks();
589
707
  }
708
+ return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
590
709
  }
591
- return -1;
592
710
  }
593
711
 
712
+ /**
713
+ * Called when the parser encounters an element starting with '<!'.
714
+ *
715
+ * @param {string} source
716
+ * The xml.
717
+ * @param {number} start
718
+ * the start index of the '<!'
719
+ * @param {DOMHandler} domBuilder
720
+ * @param {DOMHandler} errorHandler
721
+ * @returns {number | never} The end index of the element.
722
+ * @throws {ParseError}
723
+ * In case the element is not well-formed.
724
+ */
725
+ function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler) {
726
+ var p = parseUtils(source, start);
727
+
728
+ switch (p.char(2)) {
729
+ case '-':
730
+ // should be a comment
731
+ var comment = p.getMatch(g.Comment);
732
+ if (comment) {
733
+ domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
734
+ return p.getIndex();
735
+ } else {
736
+ return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
737
+ }
738
+ case '[':
739
+ // should be CDATA
740
+ var cdata = p.getMatch(g.CDSect);
741
+ if (cdata) {
742
+ domBuilder.startCDATA();
743
+ domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
744
+ domBuilder.endCDATA();
745
+ return p.getIndex();
746
+ } else {
747
+ return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
748
+ }
749
+ case 'D': {
750
+ // should be DOCTYPE
751
+ var doctype = {
752
+ name: undefined,
753
+ publicId: undefined,
754
+ systemId: undefined,
755
+ internalSubset: undefined,
756
+ };
757
+
758
+ if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
759
+ return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
760
+ }
761
+ p.skip(g.DOCTYPE_DECL_START.length);
762
+ if (p.skipBlanks() < 1) {
763
+ return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
764
+ }
765
+
766
+ // Parse the DOCTYPE name
767
+ doctype.name = p.getMatch(g.Name);
768
+ if (!doctype.name)
769
+ return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
770
+ p.skipBlanks();
771
+
772
+ // Check for ExternalID
773
+ if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
774
+ var match = g.ExternalID_match.exec(p.substringFromIndex());
775
+ if (!match) {
776
+ return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
777
+ }
778
+ if (match.groups.SystemLiteralOnly !== undefined) {
779
+ doctype.systemId = match.groups.SystemLiteralOnly;
780
+ } else {
781
+ doctype.systemId = match.groups.SystemLiteral;
782
+ doctype.publicId = match.groups.PubidLiteral;
783
+ }
784
+ p.skip(match[0].length);
785
+ }
594
786
 
787
+ p.skipBlanks();
788
+ doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
789
+ p.skipBlanks();
790
+ if (p.char() !== '>') {
791
+ return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
792
+ }
793
+ p.skip(1);
794
+ domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
795
+ domBuilder.endDTD();
796
+ return p.getIndex();
797
+ }
798
+ default:
799
+ return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
800
+ }
801
+ }
595
802
 
596
- function parseInstruction(source,start,domBuilder){
597
- var end = source.indexOf('?>',start);
598
- if(end){
599
- var match = source.substring(start,end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
600
- if(match){
601
- var len = match[0].length;
602
- domBuilder.processingInstruction(match[1], match[2]) ;
603
- return end+2;
604
- }else{//error
605
- return -1;
803
+ function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
804
+ var match = source.substring(start).match(g.PI);
805
+ if (!match) {
806
+ return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
807
+ }
808
+ if (match[1].toLowerCase() === 'xml') {
809
+ if (start > 0) {
810
+ return errorHandler.fatalError(
811
+ 'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
812
+ );
813
+ }
814
+ if (!g.XMLDecl.test(source.substring(start))) {
815
+ return errorHandler.fatalError('xml declaration is not well-formed');
606
816
  }
607
817
  }
608
- return -1;
818
+ domBuilder.processingInstruction(match[1], match[2]);
819
+ return start + match[0].length;
609
820
  }
610
821
 
611
- function ElementAttributes(){
612
- this.attributeNames = {}
822
+ function ElementAttributes() {
823
+ this.attributeNames = {};
613
824
  }
825
+
614
826
  ElementAttributes.prototype = {
615
- setTagName:function(tagName){
616
- if(!tagNamePattern.test(tagName)){
617
- throw new Error('invalid tagName:'+tagName)
827
+ setTagName: function (tagName) {
828
+ if (!g.QName_exact.test(tagName)) {
829
+ throw new Error('invalid tagName:' + tagName);
618
830
  }
619
- this.tagName = tagName
831
+ this.tagName = tagName;
620
832
  },
621
- addValue:function(qName, value, offset) {
622
- if(!tagNamePattern.test(qName)){
623
- throw new Error('invalid attribute:'+qName)
833
+ addValue: function (qName, value, offset) {
834
+ if (!g.QName_exact.test(qName)) {
835
+ throw new Error('invalid attribute:' + qName);
624
836
  }
625
837
  this.attributeNames[qName] = this.length;
626
- this[this.length++] = {qName:qName,value:value,offset:offset}
838
+ this[this.length++] = { qName: qName, value: value, offset: offset };
627
839
  },
628
- length:0,
629
- getLocalName:function(i){return this[i].localName},
630
- getLocator:function(i){return this[i].locator},
631
- getQName:function(i){return this[i].qName},
632
- getURI:function(i){return this[i].uri},
633
- getValue:function(i){return this[i].value}
634
- // ,getIndex:function(uri, localName)){
635
- // if(localName){
636
- //
637
- // }else{
638
- // var qName = uri
639
- // }
640
- // },
641
- // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
642
- // getType:function(uri,localName){}
643
- // getType:function(i){},
644
- }
645
-
646
-
647
-
648
- function split(source,start){
649
- var match;
650
- var buf = [];
651
- var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
652
- reg.lastIndex = start;
653
- reg.exec(source);//skip <
654
- while(match = reg.exec(source)){
655
- buf.push(match);
656
- if(match[1])return buf;
657
- }
658
- }
840
+ length: 0,
841
+ getLocalName: function (i) {
842
+ return this[i].localName;
843
+ },
844
+ getLocator: function (i) {
845
+ return this[i].locator;
846
+ },
847
+ getQName: function (i) {
848
+ return this[i].qName;
849
+ },
850
+ getURI: function (i) {
851
+ return this[i].uri;
852
+ },
853
+ getValue: function (i) {
854
+ return this[i].value;
855
+ },
856
+ // ,getIndex:function(uri, localName)){
857
+ // if(localName){
858
+ //
859
+ // }else{
860
+ // var qName = uri
861
+ // }
862
+ // },
863
+ // getValue:function(){return this.getValue(this.getIndex.apply(this,arguments))},
864
+ // getType:function(uri,localName){}
865
+ // getType:function(i){},
866
+ };
659
867
 
660
868
  exports.XMLReader = XMLReader;
661
- exports.ParseError = ParseError;
869
+ exports.parseUtils = parseUtils;
870
+ exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;