@xmldom/xmldom 0.9.0-beta.8 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/sax.js CHANGED
@@ -1,25 +1,17 @@
1
1
  'use strict';
2
2
 
3
3
  var conventions = require('./conventions');
4
- var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
4
+ var g = require('./grammar');
5
+ var errors = require('./errors');
6
+
5
7
  var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
8
+ var isHTMLMimeType = conventions.isHTMLMimeType;
9
+ var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
10
+ var hasOwn = conventions.hasOwn;
6
11
  var NAMESPACE = conventions.NAMESPACE;
7
- var MIME_TYPE = conventions.MIME_TYPE;
8
-
9
- //[5] Name ::= NameStartChar (NameChar)*
10
- // https://www.w3.org/TR/REC-xml/#NT-Name
11
- // https://www.w3.org/TR/xml-names/#ns-qualnames
12
- // roughly matches /^[a-zA-Z_][\w\-\.]*(?:\:[a-zA-Z_][\w\-\.]*)?$/
13
- // which means we currently do not allow : as the first character in a tag name
14
- var tagNamePattern = new RegExp(
15
- '^' +
16
- conventions.QNAME_START_CHAR.source +
17
- conventions.NAME_CHAR.source +
18
- '*(?:' +
19
- conventions.NAME_START_CHAR.source +
20
- conventions.NAME_CHAR.source +
21
- '*)?$'
22
- );
12
+ var ParseError = errors.ParseError;
13
+ var DOMException = errors.DOMException;
14
+
23
15
  //var handlers = 'resolveEntity,getExternalSubset,characters,endDocument,endElement,endPrefixMapping,ignorableWhitespace,processingInstruction,setDocumentLocator,skippedEntity,startDocument,startElement,startPrefixMapping,notationDecl,unparsedEntityDecl,error,fatalError,warning,attributeDecl,elementDecl,externalEntityDecl,internalEntityDecl,comment,endCDATA,endDTD,endEntity,startCDATA,startDTD,startEntity'.split(',')
24
16
 
25
17
  //S_TAG, S_ATTR, S_EQ, S_ATTR_NOQUOT_VALUE
@@ -33,34 +25,33 @@ var S_ATTR_END = 5; //attr value end and no space(quot end)
33
25
  var S_TAG_SPACE = 6; //(attr value end || tag end ) && (space offer)
34
26
  var S_TAG_CLOSE = 7; //closed el<el />
35
27
 
36
- /**
37
- * Creates an error that will not be caught by XMLReader aka the SAX parser.
38
- *
39
- * @param {string} message
40
- * @param {any?} locator Optional, can provide details about the location in the source
41
- * @constructor
42
- */
43
- function ParseError(message, locator) {
44
- this.message = message;
45
- this.locator = locator;
46
- if (Error.captureStackTrace) Error.captureStackTrace(this, ParseError);
47
- }
48
- ParseError.prototype = new Error();
49
- ParseError.prototype.name = ParseError.name;
50
-
51
28
  function XMLReader() {}
52
29
 
53
30
  XMLReader.prototype = {
54
31
  parse: function (source, defaultNSMap, entityMap) {
55
32
  var domBuilder = this.domBuilder;
56
33
  domBuilder.startDocument();
57
- _copy(defaultNSMap, (defaultNSMap = {}));
34
+ _copy(defaultNSMap, (defaultNSMap = Object.create(null)));
58
35
  parse(source, defaultNSMap, entityMap, domBuilder, this.errorHandler);
59
36
  domBuilder.endDocument();
60
37
  },
61
38
  };
39
+
40
+ /**
41
+ * Detecting everything that might be a reference,
42
+ * including those without ending `;`, since those are allowed in HTML.
43
+ * The entityReplacer takes care of verifying and transforming each occurrence,
44
+ * and reports to the errorHandler on those that are not OK,
45
+ * depending on the context.
46
+ */
47
+ var ENTITY_REG = /&#?\w+;?/g;
48
+
62
49
  function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
63
- var isHTML = MIME_TYPE.isHTML(domBuilder.mimeType);
50
+ var isHTML = isHTMLMimeType(domBuilder.mimeType);
51
+ if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
52
+ return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
53
+ }
54
+
64
55
  function fixedFromCharCode(code) {
65
56
  // String.prototype.fromCharCode does not supports
66
57
  // > 2 bytes unicode chars directly
@@ -74,9 +65,20 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
74
65
  return String.fromCharCode(code);
75
66
  }
76
67
  }
68
+
77
69
  function entityReplacer(a) {
78
- var k = a.slice(1, -1);
79
- if (Object.hasOwnProperty.call(entityMap, k)) {
70
+ var complete = a[a.length - 1] === ';' ? a : a + ';';
71
+ if (!isHTML && complete !== a) {
72
+ errorHandler.error('EntityRef: expecting ;');
73
+ return a;
74
+ }
75
+ var match = g.Reference.exec(complete);
76
+ if (!match || match[0].length !== complete.length) {
77
+ errorHandler.error('entity not matching Reference production: ' + a);
78
+ return a;
79
+ }
80
+ var k = complete.slice(1, -1);
81
+ if (hasOwn(entityMap, k)) {
80
82
  return entityMap[k];
81
83
  } else if (k.charAt(0) === '#') {
82
84
  return fixedFromCharCode(parseInt(k.substr(1).replace('x', '0x')));
@@ -85,15 +87,17 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
85
87
  return a;
86
88
  }
87
89
  }
90
+
88
91
  function appendText(end) {
89
92
  //has some bugs
90
93
  if (end > start) {
91
- var xt = source.substring(start, end).replace(/&#?\w+;/g, entityReplacer);
94
+ var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
92
95
  locator && position(start);
93
96
  domBuilder.characters(xt, 0, end - start);
94
97
  start = end;
95
98
  }
96
99
  }
100
+
97
101
  function position(p, m) {
98
102
  while (p >= lineEnd && (m = linePattern.exec(source))) {
99
103
  lineStart = m.index;
@@ -102,74 +106,90 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
102
106
  }
103
107
  locator.columnNumber = p - lineStart + 1;
104
108
  }
109
+
105
110
  var lineStart = 0;
106
111
  var lineEnd = 0;
107
112
  var linePattern = /.*(?:\r\n?|\n)|.*$/g;
108
113
  var locator = domBuilder.locator;
109
114
 
110
115
  var parseStack = [{ currentNSMap: defaultNSMapCopy }];
111
- var closeMap = {};
116
+ var unclosedTags = [];
112
117
  var start = 0;
113
118
  while (true) {
114
119
  try {
115
120
  var tagStart = source.indexOf('<', start);
116
121
  if (tagStart < 0) {
117
- if (!source.substr(start).match(/^\s*$/)) {
122
+ if (!isHTML && unclosedTags.length > 0) {
123
+ return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
124
+ }
125
+ if (!source.substring(start).match(/^\s*$/)) {
118
126
  var doc = domBuilder.doc;
119
127
  var text = doc.createTextNode(source.substr(start));
128
+ if (doc.documentElement) {
129
+ return errorHandler.error('Extra content at the end of the document');
130
+ }
120
131
  doc.appendChild(text);
121
132
  domBuilder.currentElement = text;
122
133
  }
123
134
  return;
124
135
  }
125
136
  if (tagStart > start) {
137
+ var fromSource = source.substring(start, tagStart);
138
+ if (!isHTML && unclosedTags.length === 0) {
139
+ fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
140
+ fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
141
+ }
126
142
  appendText(tagStart);
127
143
  }
128
144
  switch (source.charAt(tagStart + 1)) {
129
145
  case '/':
130
- var config = parseStack.pop();
131
- var end = source.indexOf('>', tagStart + 3);
146
+ var end = source.indexOf('>', tagStart + 2);
132
147
  var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
133
- var tagNameMatch = new RegExp('(' + tagNamePattern.source.slice(0, -1) + ')').exec(tagNameRaw);
134
- // for the root level the config does not contain the tagName
135
- var tagName =
136
- tagNameMatch && tagNameMatch[1] ? tagNameMatch[1] : config.tagName || domBuilder.doc.documentElement.tagName;
137
- if (end < 0) {
138
- errorHandler.error('end tag name: ' + tagName + ' is not complete');
139
- end = tagStart + 1 + tagName.length;
140
- } else if (tagNameRaw.match(/</) && !isHTML) {
141
- errorHandler.error('end tag name: ' + tagName + ' maybe not complete');
148
+ if (!tagNameRaw) {
149
+ return errorHandler.fatalError('end tag name missing');
150
+ }
151
+ var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
152
+ if (!tagNameMatch) {
153
+ return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
154
+ }
155
+ if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
156
+ // not enough information to provide a helpful error message,
157
+ // but parsing will throw since there is no root element
158
+ return;
159
+ }
160
+ var currentTagName =
161
+ unclosedTags[unclosedTags.length - 1] ||
162
+ domBuilder.currentElement.tagName ||
163
+ domBuilder.doc.documentElement.tagName ||
164
+ '';
165
+ if (currentTagName !== tagNameMatch[1]) {
166
+ var tagNameLower = tagNameMatch[1].toLowerCase();
167
+ if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
168
+ return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
169
+ }
142
170
  }
171
+ var config = parseStack.pop();
172
+ unclosedTags.pop();
143
173
  var localNSMap = config.localNSMap;
144
- var endMatch = config.tagName == tagName;
145
- var endIgnoreCaseMach = endMatch || (config.tagName && config.tagName.toLowerCase() == tagName.toLowerCase());
146
- if (endIgnoreCaseMach) {
147
- domBuilder.endElement(config.uri, config.localName, tagName);
148
- if (localNSMap) {
149
- for (var prefix in localNSMap) {
150
- if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
151
- domBuilder.endPrefixMapping(prefix);
152
- }
174
+ domBuilder.endElement(config.uri, config.localName, currentTagName);
175
+ if (localNSMap) {
176
+ for (var prefix in localNSMap) {
177
+ if (hasOwn(localNSMap, prefix)) {
178
+ domBuilder.endPrefixMapping(prefix);
153
179
  }
154
180
  }
155
- if (!endMatch) {
156
- // No known test case
157
- errorHandler.fatalError('end tag name: ' + tagName + ' is not match the current start tagName:' + config.tagName);
158
- }
159
- } else {
160
- parseStack.push(config);
161
181
  }
162
182
 
163
183
  end++;
164
184
  break;
165
- // end elment
185
+ // end element
166
186
  case '?': // <?...?>
167
187
  locator && position(tagStart);
168
- end = parseInstruction(source, tagStart, domBuilder);
188
+ end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
169
189
  break;
170
190
  case '!': // <!doctype,<![CDATA,<!--
171
191
  locator && position(tagStart);
172
- end = parseDCC(source, tagStart, domBuilder, errorHandler);
192
+ end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
173
193
  break;
174
194
  default:
175
195
  locator && position(tagStart);
@@ -179,10 +199,11 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
179
199
  var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
180
200
  var len = el.length;
181
201
 
182
- if (!el.closed && fixSelfClosed(source, end, el.tagName, closeMap)) {
183
- el.closed = true;
184
- if (!isHTML) {
185
- errorHandler.warning('unclosed xml attribute');
202
+ if (!el.closed) {
203
+ if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
204
+ el.closed = true;
205
+ } else {
206
+ unclosedTags.push(el.tagName);
186
207
  }
187
208
  }
188
209
  if (locator && len) {
@@ -213,6 +234,8 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
213
234
  } catch (e) {
214
235
  if (e instanceof ParseError) {
215
236
  throw e;
237
+ } else if (e instanceof DOMException) {
238
+ throw new ParseError(e.name + ': ' + e.message, domBuilder.locator, e);
216
239
  }
217
240
  errorHandler.error('element parse error: ' + e);
218
241
  end = -1;
@@ -220,11 +243,12 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
220
243
  if (end > start) {
221
244
  start = end;
222
245
  } else {
223
- //TODO: 这里有可能sax回退,有位置错误风险
246
+ //Possible sax fallback here, risk of positional error
224
247
  appendText(Math.max(tagStart, start) + 1);
225
248
  }
226
249
  }
227
250
  }
251
+
228
252
  function copyLocator(f, t) {
229
253
  t.lineNumber = f.lineNumber;
230
254
  t.columnNumber = f.columnNumber;
@@ -232,8 +256,9 @@ function copyLocator(f, t) {
232
256
  }
233
257
 
234
258
  /**
235
- * @see #appendElement(source,elStartEnd,el,selfClosed,entityReplacer,domBuilder,parseStack);
236
- * @return end of the elementStartPart(end of elementEndPart for selfClosed el)
259
+ * @returns
260
+ * end of the elementStartPart(end of elementEndPart for selfClosed el)
261
+ * @see {@link #appendElement}
237
262
  */
238
263
  function parseElementStartPart(source, start, el, currentNSMap, entityReplacer, errorHandler, isHTML) {
239
264
  /**
@@ -242,8 +267,11 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
242
267
  * @param {number} startIndex
243
268
  */
244
269
  function addAttribute(qname, value, startIndex) {
245
- if (el.attributeNames.hasOwnProperty(qname)) {
246
- errorHandler.fatalError('Attribute ' + qname + ' redefined');
270
+ if (hasOwn(el.attributeNames, qname)) {
271
+ return errorHandler.fatalError('Attribute ' + qname + ' redefined');
272
+ }
273
+ if (!isHTML && value.indexOf('<') >= 0) {
274
+ return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
247
275
  }
248
276
  el.addValue(
249
277
  qname,
@@ -251,10 +279,11 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
251
279
  // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
252
280
  // - recursive replacement of (DTD) entity references
253
281
  // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
254
- value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
282
+ value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
255
283
  startIndex
256
284
  );
257
285
  }
286
+
258
287
  var attrName;
259
288
  var value;
260
289
  var p = ++start;
@@ -362,9 +391,10 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
362
391
  }
363
392
  break;
364
393
  case S_EQ:
365
- throw new Error('attribute value missed!!');
394
+ if (!isHTML) {
395
+ return errorHandler.fatalError('AttValue: \' or " expected');
396
+ }
366
397
  }
367
- // console.log(tagName,tagNamePattern,tagNamePattern.test(tagName))
368
398
  return p;
369
399
  /*xml space '\x20' | #x9 | #xD | #xA; */
370
400
  case '\u0080':
@@ -426,17 +456,17 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
426
456
  }
427
457
  }
428
458
  } //end outer switch
429
- //console.log('p++',p)
430
459
  p++;
431
460
  }
432
461
  }
462
+
433
463
  /**
434
- * @return true if has new namespace define
464
+ * @returns
465
+ * `true` if a new namespace has been defined.
435
466
  */
436
467
  function appendElement(el, domBuilder, currentNSMap) {
437
468
  var tagName = el.tagName;
438
469
  var localNSMap = null;
439
- //var currentNSMap = parseStack[parseStack.length-1].currentNSMap;
440
470
  var i = el.length;
441
471
  while (i--) {
442
472
  var a = el[i];
@@ -458,10 +488,8 @@ function appendElement(el, domBuilder, currentNSMap) {
458
488
  if (nsPrefix !== false) {
459
489
  //hack!!
460
490
  if (localNSMap == null) {
461
- localNSMap = {};
462
- //console.log(currentNSMap,0)
463
- _copy(currentNSMap, (currentNSMap = {}));
464
- //console.log(currentNSMap,1)
491
+ localNSMap = Object.create(null);
492
+ _copy(currentNSMap, (currentNSMap = Object.create(null)));
465
493
  }
466
494
  currentNSMap[nsPrefix] = localNSMap[nsPrefix] = value;
467
495
  a.uri = NAMESPACE.XMLNS;
@@ -498,7 +526,7 @@ function appendElement(el, domBuilder, currentNSMap) {
498
526
  domBuilder.endElement(ns, localName, tagName);
499
527
  if (localNSMap) {
500
528
  for (prefix in localNSMap) {
501
- if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
529
+ if (hasOwn(localNSMap, prefix)) {
502
530
  domBuilder.endPrefixMapping(prefix);
503
531
  }
504
532
  }
@@ -510,6 +538,7 @@ function appendElement(el, domBuilder, currentNSMap) {
510
538
  return true;
511
539
  }
512
540
  }
541
+
513
542
  function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, domBuilder) {
514
543
  // https://html.spec.whatwg.org/#raw-text-elements
515
544
  // https://html.spec.whatwg.org/#escapable-raw-text-elements
@@ -521,117 +550,321 @@ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, do
521
550
  var text = source.substring(elStartEnd + 1, elEndStart);
522
551
 
523
552
  if (isEscapableRaw) {
524
- text = text.replace(/&#?\w+;/g, entityReplacer);
553
+ text = text.replace(ENTITY_REG, entityReplacer);
525
554
  }
526
555
  domBuilder.characters(text, 0, text.length);
527
556
  return elEndStart;
528
557
  }
529
558
  return elStartEnd + 1;
530
559
  }
531
- function fixSelfClosed(source, elStartEnd, tagName, closeMap) {
532
- //if(tagName in closeMap){
533
- var pos = closeMap[tagName];
534
- if (pos == null) {
535
- //console.log(tagName)
536
- pos = source.lastIndexOf('</' + tagName + '>');
537
- if (pos < elStartEnd) {
538
- //忘记闭合
539
- pos = source.lastIndexOf('</' + tagName);
540
- }
541
- closeMap[tagName] = pos;
542
- }
543
- return pos < elStartEnd;
544
- //}
545
- }
546
560
 
547
561
  function _copy(source, target) {
548
562
  for (var n in source) {
549
- if (Object.prototype.hasOwnProperty.call(source, n)) {
563
+ if (hasOwn(source, n)) {
550
564
  target[n] = source[n];
551
565
  }
552
566
  }
553
567
  }
554
568
 
555
- function parseDCC(source, start, domBuilder, errorHandler) {
556
- //sure start with '<!'
557
- var next = source.charAt(start + 2);
558
- switch (next) {
559
- case '-':
560
- if (source.charAt(start + 3) === '-') {
561
- var end = source.indexOf('-->', start + 4);
562
- //append comment source.substring(4,end)//<!--
563
- if (end > start) {
564
- domBuilder.comment(source, start + 4, end - start - 4);
565
- return end + 3;
566
- } else {
567
- errorHandler.error('Unclosed comment');
568
- return -1;
569
+ /**
570
+ * @typedef ParseUtils
571
+ * @property {function(relativeIndex: number?): string | undefined} char
572
+ * Provides look ahead access to a singe character relative to the current index.
573
+ * @property {function(): number} getIndex
574
+ * Provides read-only access to the current index.
575
+ * @property {function(reg: RegExp): string | null} getMatch
576
+ * Applies the provided regular expression enforcing that it starts at the current index and
577
+ * returns the complete matching string,
578
+ * and moves the current index by the length of the matching string.
579
+ * @property {function(): string} getSource
580
+ * Provides read-only access to the complete source.
581
+ * @property {function(places: number?): void} skip
582
+ * moves the current index by places (defaults to 1)
583
+ * @property {function(): number} skipBlanks
584
+ * Moves the current index by the amount of white space that directly follows the current index
585
+ * and returns the amount of whitespace chars skipped (0..n),
586
+ * or -1 if the end of the source was reached.
587
+ * @property {function(): string} substringFromIndex
588
+ * creates a substring from the current index to the end of `source`
589
+ * @property {function(compareWith: string): boolean} substringStartsWith
590
+ * Checks if source contains `compareWith`,
591
+ * starting from the current index.
592
+ * @see {@link parseUtils}
593
+ */
594
+
595
+ /**
596
+ * A temporary scope for parsing and look ahead operations in `source`,
597
+ * starting from index `start`.
598
+ *
599
+ * Some operations move the current index by a number of positions,
600
+ * after which `getIndex` returns the new index.
601
+ *
602
+ * @param {string} source
603
+ * @param {number} start
604
+ * @returns {ParseUtils}
605
+ */
606
+ function parseUtils(source, start) {
607
+ var index = start;
608
+
609
+ function char(n) {
610
+ n = n || 0;
611
+ return source.charAt(index + n);
612
+ }
613
+
614
+ function skip(n) {
615
+ n = n || 1;
616
+ index += n;
617
+ }
618
+
619
+ function skipBlanks() {
620
+ var blanks = 0;
621
+ while (index < source.length) {
622
+ var c = char();
623
+ if (c !== ' ' && c !== '\n' && c !== '\t' && c !== '\r') {
624
+ return blanks;
625
+ }
626
+ blanks++;
627
+ skip();
628
+ }
629
+ return -1;
630
+ }
631
+ function substringFromIndex() {
632
+ return source.substring(index);
633
+ }
634
+ function substringStartsWith(text) {
635
+ return source.substring(index, index + text.length) === text;
636
+ }
637
+
638
+ function getMatch(args) {
639
+ var expr = g.reg('^', args);
640
+ var match = expr.exec(substringFromIndex());
641
+ if (match) {
642
+ skip(match[0].length);
643
+ return match[0];
644
+ }
645
+ return null;
646
+ }
647
+ return {
648
+ char: char,
649
+ getIndex: function () {
650
+ return index;
651
+ },
652
+ getMatch: getMatch,
653
+ getSource: function () {
654
+ return source;
655
+ },
656
+ skip: skip,
657
+ skipBlanks: skipBlanks,
658
+ substringFromIndex: substringFromIndex,
659
+ substringStartsWith: substringStartsWith,
660
+ };
661
+ }
662
+
663
+ /**
664
+ * @param {ParseUtils} p
665
+ * @param {DOMHandler} errorHandler
666
+ * @returns {string}
667
+ */
668
+ function parseDoctypeInternalSubset(p, errorHandler) {
669
+ /**
670
+ * @param {ParseUtils} p
671
+ * @param {DOMHandler} errorHandler
672
+ * @returns {string}
673
+ */
674
+ function parsePI(p, errorHandler) {
675
+ var match = g.PI.exec(p.substringFromIndex());
676
+ if (!match) {
677
+ return errorHandler.fatalError('processing instruction is not well-formed at position ' + p.getIndex());
678
+ }
679
+ if (match[1].toLowerCase() === 'xml') {
680
+ return errorHandler.fatalError(
681
+ 'xml declaration is only allowed at the start of the document, but found at position ' + p.getIndex()
682
+ );
683
+ }
684
+ p.skip(match[0].length);
685
+ return match[0];
686
+ }
687
+ // Parse internal subset
688
+ var source = p.getSource();
689
+ if (p.char() === '[') {
690
+ p.skip(1);
691
+ var intSubsetStart = p.getIndex();
692
+ while (p.getIndex() < source.length) {
693
+ p.skipBlanks();
694
+ if (p.char() === ']') {
695
+ var internalSubset = source.substring(intSubsetStart, p.getIndex());
696
+ p.skip(1);
697
+ return internalSubset;
698
+ }
699
+ var current = null;
700
+ // Only in external subset
701
+ // if (char() === '<' && char(1) === '!' && char(2) === '[') {
702
+ // parseConditionalSections(p, errorHandler);
703
+ // } else
704
+ if (p.char() === '<' && p.char(1) === '!') {
705
+ switch (p.char(2)) {
706
+ case 'E': // ELEMENT | ENTITY
707
+ if (p.char(3) === 'L') {
708
+ current = p.getMatch(g.elementdecl);
709
+ } else if (p.char(3) === 'N') {
710
+ current = p.getMatch(g.EntityDecl);
711
+ }
712
+ break;
713
+ case 'A': // ATTRIBUTE
714
+ current = p.getMatch(g.AttlistDecl);
715
+ break;
716
+ case 'N': // NOTATION
717
+ current = p.getMatch(g.NotationDecl);
718
+ break;
719
+ case '-': // COMMENT
720
+ current = p.getMatch(g.Comment);
721
+ break;
569
722
  }
723
+ } else if (p.char() === '<' && p.char(1) === '?') {
724
+ current = parsePI(p, errorHandler);
725
+ } else if (p.char() === '%') {
726
+ current = p.getMatch(g.PEReference);
570
727
  } else {
571
- //error
572
- return -1;
728
+ return errorHandler.fatalError('Error detected in Markup declaration');
573
729
  }
574
- default:
575
- if (source.substr(start + 3, 6) == 'CDATA[') {
576
- var end = source.indexOf(']]>', start + 9);
730
+ if (!current) {
731
+ return errorHandler.fatalError('Error in internal subset at position ' + p.getIndex());
732
+ }
733
+ }
734
+ return errorHandler.fatalError('doctype internal subset is not well-formed, missing ]');
735
+ }
736
+ }
737
+
738
+ /**
739
+ * Called when the parser encounters an element starting with '<!'.
740
+ *
741
+ * @param {string} source
742
+ * The xml.
743
+ * @param {number} start
744
+ * the start index of the '<!'
745
+ * @param {DOMHandler} domBuilder
746
+ * @param {DOMHandler} errorHandler
747
+ * @param {boolean} isHTML
748
+ * @returns {number | never}
749
+ * The end index of the element.
750
+ * @throws {ParseError}
751
+ * In case the element is not well-formed.
752
+ */
753
+ function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
754
+ var p = parseUtils(source, start);
755
+
756
+ switch (p.char(2)) {
757
+ case '-':
758
+ // should be a comment
759
+ var comment = p.getMatch(g.Comment);
760
+ if (comment) {
761
+ domBuilder.comment(comment, g.COMMENT_START.length, comment.length - g.COMMENT_START.length - g.COMMENT_END.length);
762
+ return p.getIndex();
763
+ } else {
764
+ return errorHandler.fatalError('comment is not well-formed at position ' + p.getIndex());
765
+ }
766
+ case '[':
767
+ // should be CDATA
768
+ var cdata = p.getMatch(g.CDSect);
769
+ if (cdata) {
770
+ if (!isHTML && !domBuilder.currentElement) {
771
+ return errorHandler.fatalError('CDATA outside of element');
772
+ }
577
773
  domBuilder.startCDATA();
578
- domBuilder.characters(source, start + 9, end - start - 9);
774
+ domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
579
775
  domBuilder.endCDATA();
580
- return end + 3;
776
+ return p.getIndex();
777
+ } else {
778
+ return errorHandler.fatalError('Invalid CDATA starting at position ' + start);
581
779
  }
582
- //<!DOCTYPE
583
- //startDTD(java.lang.String name, java.lang.String publicId, java.lang.String systemId)
584
- var matchs = split(source, start);
585
- var len = matchs.length;
586
- if (len > 1 && /!doctype/i.test(matchs[0][0])) {
587
- var name = matchs[1][0];
588
- var pubid = false;
589
- var sysid = false;
590
- if (len > 3) {
591
- if (/^public$/i.test(matchs[2][0])) {
592
- pubid = matchs[3][0];
593
- sysid = len > 4 && matchs[4][0];
594
- } else if (/^system$/i.test(matchs[2][0])) {
595
- sysid = matchs[3][0];
596
- }
780
+ case 'D': {
781
+ // should be DOCTYPE
782
+ if (domBuilder.doc && domBuilder.doc.documentElement) {
783
+ return errorHandler.fatalError('Doctype not allowed inside or after documentElement at position ' + p.getIndex());
784
+ }
785
+ if (!p.substringStartsWith(g.DOCTYPE_DECL_START)) {
786
+ return errorHandler.fatalError('Expected ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
787
+ }
788
+ p.skip(g.DOCTYPE_DECL_START.length);
789
+ if (p.skipBlanks() < 1) {
790
+ return errorHandler.fatalError('Expected whitespace after ' + g.DOCTYPE_DECL_START + ' at position ' + p.getIndex());
791
+ }
792
+
793
+ var doctype = {
794
+ name: undefined,
795
+ publicId: undefined,
796
+ systemId: undefined,
797
+ internalSubset: undefined,
798
+ };
799
+ // Parse the DOCTYPE name
800
+ doctype.name = p.getMatch(g.Name);
801
+ if (!doctype.name)
802
+ return errorHandler.fatalError('doctype name missing or contains unexpected characters at position ' + p.getIndex());
803
+ p.skipBlanks();
804
+
805
+ // Check for ExternalID
806
+ if (p.substringStartsWith(g.PUBLIC) || p.substringStartsWith(g.SYSTEM)) {
807
+ var match = g.ExternalID_match.exec(p.substringFromIndex());
808
+ if (!match) {
809
+ return errorHandler.fatalError('doctype external id is not well-formed at position ' + p.getIndex());
810
+ }
811
+ if (match.groups.SystemLiteralOnly !== undefined) {
812
+ doctype.systemId = match.groups.SystemLiteralOnly;
813
+ } else {
814
+ doctype.systemId = match.groups.SystemLiteral;
815
+ doctype.publicId = match.groups.PubidLiteral;
597
816
  }
598
- var lastMatch = matchs[len - 1];
599
- domBuilder.startDTD(name, pubid, sysid);
600
- domBuilder.endDTD();
817
+ p.skip(match[0].length);
818
+ }
601
819
 
602
- return lastMatch.index + lastMatch[0].length;
820
+ p.skipBlanks();
821
+ doctype.internalSubset = parseDoctypeInternalSubset(p, errorHandler);
822
+ p.skipBlanks();
823
+ if (p.char() !== '>') {
824
+ return errorHandler.fatalError('doctype not terminated with > at position ' + p.getIndex());
603
825
  }
826
+ p.skip(1);
827
+ domBuilder.startDTD(doctype.name, doctype.publicId, doctype.systemId, doctype.internalSubset);
828
+ domBuilder.endDTD();
829
+ return p.getIndex();
830
+ }
831
+ default:
832
+ return errorHandler.fatalError('Not well-formed XML starting with "<!" at position ' + start);
604
833
  }
605
- return -1;
606
834
  }
607
835
 
608
- function parseInstruction(source, start, domBuilder) {
609
- var end = source.indexOf('?>', start);
610
- if (end) {
611
- var match = source.substring(start, end).match(/^<\?(\S*)\s*([\s\S]*?)\s*$/);
612
- if (match) {
613
- domBuilder.processingInstruction(match[1], match[2]);
614
- return end + 2;
615
- } else {
616
- //error
617
- return -1;
836
+ function parseProcessingInstruction(source, start, domBuilder, errorHandler) {
837
+ var match = source.substring(start).match(g.PI);
838
+ if (!match) {
839
+ return errorHandler.fatalError('Invalid processing instruction starting at position ' + start);
840
+ }
841
+ if (match[1].toLowerCase() === 'xml') {
842
+ if (start > 0) {
843
+ return errorHandler.fatalError(
844
+ 'processing instruction at position ' + start + ' is an xml declaration which is only at the start of the document'
845
+ );
846
+ }
847
+ if (!g.XMLDecl.test(source.substring(start))) {
848
+ return errorHandler.fatalError('xml declaration is not well-formed');
618
849
  }
619
850
  }
620
- return -1;
851
+ domBuilder.processingInstruction(match[1], match[2]);
852
+ return start + match[0].length;
621
853
  }
622
854
 
623
855
  function ElementAttributes() {
624
- this.attributeNames = {};
856
+ this.attributeNames = Object.create(null);
625
857
  }
858
+
626
859
  ElementAttributes.prototype = {
627
860
  setTagName: function (tagName) {
628
- if (!tagNamePattern.test(tagName)) {
861
+ if (!g.QName_exact.test(tagName)) {
629
862
  throw new Error('invalid tagName:' + tagName);
630
863
  }
631
864
  this.tagName = tagName;
632
865
  },
633
866
  addValue: function (qName, value, offset) {
634
- if (!tagNamePattern.test(qName)) {
867
+ if (!g.QName_exact.test(qName)) {
635
868
  throw new Error('invalid attribute:' + qName);
636
869
  }
637
870
  this.attributeNames[qName] = this.length;
@@ -665,17 +898,6 @@ ElementAttributes.prototype = {
665
898
  // getType:function(i){},
666
899
  };
667
900
 
668
- function split(source, start) {
669
- var match;
670
- var buf = [];
671
- var reg = /'[^']+'|"[^"]+"|[^\s<>\/=]+=?|(\/?\s*>|<)/g;
672
- reg.lastIndex = start;
673
- reg.exec(source); //skip <
674
- while ((match = reg.exec(source))) {
675
- buf.push(match);
676
- if (match[1]) return buf;
677
- }
678
- }
679
-
680
901
  exports.XMLReader = XMLReader;
681
- exports.ParseError = ParseError;
902
+ exports.parseUtils = parseUtils;
903
+ exports.parseDoctypeCommentOrCData = parseDoctypeCommentOrCData;