@xmldom/xmldom 0.9.0-beta.10 → 0.9.0-beta.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,22 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [0.9.0-beta.11](https://github.com/xmldom/xmldom/compare/0.9.0-beta.10...0.9.0-beta.11)
8
+
9
+ ### Fixed
10
+
11
+ - report more non well-formed cases [`#519`](https://github.com/xmldom/xmldom/pull/519) / [`#45`](https://github.com/xmldom/xmldom/issues/45) / [`#125`](https://github.com/xmldom/xmldom/issues/125) / [`#467`](https://github.com/xmldom/xmldom/issues/467)
12
+ BREAKING-CHANGE: Reports more not well-formed documents as fatalError
13
+ and drop broken support for optional and unclosed tags in HTML.
14
+
15
+ ### Other
16
+
17
+ - Translate/drop non English comments [`#518`](https://github.com/xmldom/xmldom/pull/518)
18
+ - use node v16 for development [`#517`](https://github.com/xmldom/xmldom/pull/517)
19
+
20
+ Thank you, [@brodybits](https://github.com/brodybits), [@cbettinger](https://github.com/cbettinger), [@josecarlosrx](https://github.com/josecarlosrx), for your contributions
21
+
22
+
7
23
  ## [0.9.0-beta.10](https://github.com/xmldom/xmldom/compare/0.9.0-beta.9...0.9.0-beta.10)
8
24
 
9
25
  ### Fixed
package/lib/dom-parser.js CHANGED
@@ -200,7 +200,7 @@ function DOMParser(options) {
200
200
  * @param {string} [mimeType='application/xml']
201
201
  * the mimeType or contentType of the document to be created determines the `type` of document
202
202
  * created (XML or HTML)
203
- * @returns The `Document` node.
203
+ * @returns {Document} The `Document` node.
204
204
  * @throws {ParseError}
205
205
  * for any `fatalError` or anything that is thrown by `onError`
206
206
  * @throws {TypeError}
package/lib/grammar.js CHANGED
@@ -113,6 +113,16 @@ function regg(args) {
113
113
  // https://www.w3.org/TR/xml11/#NT-document
114
114
  // `[1] document ::= ( prolog element Misc* ) - ( Char* RestrictedChar Char* )`
115
115
 
116
+ /**
117
+ * A character usually appearing in wrongly converted strings.
118
+ *
119
+ * @type {string}
120
+ * @see https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
121
+ * @see https://nodejs.dev/en/api/v18/buffer/#buffers-and-character-encodings
122
+ * @see https://www.unicode.org/faq/utf_bom.html#BOM
123
+ * @readonly
124
+ */
125
+ var UNICODE_REPLACEMENT_CHARACTER = '\uFFFD';
116
126
  // https://www.w3.org/TR/xml/#NT-Char
117
127
  // any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
118
128
  // `[2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]`
@@ -500,6 +510,7 @@ exports.ExternalID = ExternalID;
500
510
  exports.ExternalID_match = ExternalID_match;
501
511
  exports.Name = Name;
502
512
  exports.NotationDecl = NotationDecl;
513
+ exports.Reference = Reference;
503
514
  exports.PEReference = PEReference;
504
515
  exports.PI = PI;
505
516
  exports.PUBLIC = PUBLIC;
@@ -512,5 +523,6 @@ exports.SChar_s = SChar_s;
512
523
  exports.S_OPT = S_OPT;
513
524
  exports.SYSTEM = SYSTEM;
514
525
  exports.SystemLiteral = SystemLiteral;
526
+ exports.UNICODE_REPLACEMENT_CHARACTER = UNICODE_REPLACEMENT_CHARACTER;
515
527
  exports.UNICODE_SUPPORT = UNICODE_SUPPORT;
516
528
  exports.XMLDecl = XMLDecl;
package/lib/sax.js CHANGED
@@ -34,8 +34,20 @@ XMLReader.prototype = {
34
34
  },
35
35
  };
36
36
 
37
+ /**
38
+ * Detecting everything that might be a reference,
39
+ * including those without ending `;`, since those are allowed in HTML.
40
+ * The entityReplacer takes care of verifying and transforming each occurrence,
41
+ * and reports to the errorHandler on those that are not OK,
42
+ * depending on the context.
43
+ */
44
+ var ENTITY_REG = /&#?\w+;?/g;
45
+
37
46
  function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
38
47
  var isHTML = isHTMLMimeType(domBuilder.mimeType);
48
+ if (source.indexOf(g.UNICODE_REPLACEMENT_CHARACTER) >= 0) {
49
+ return errorHandler.fatalError('Unicode replacement character detected, source encoding issues?');
50
+ }
39
51
 
40
52
  function fixedFromCharCode(code) {
41
53
  // String.prototype.fromCharCode does not supports
@@ -52,7 +64,17 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
52
64
  }
53
65
 
54
66
  function entityReplacer(a) {
55
- var k = a.slice(1, -1);
67
+ var complete = a[a.length - 1] === ';' ? a : a + ';';
68
+ if (!isHTML && complete !== a) {
69
+ errorHandler.error('EntityRef: expecting ;');
70
+ return a;
71
+ }
72
+ var match = g.Reference.exec(complete);
73
+ if (!match || match[0].length !== complete.length) {
74
+ errorHandler.error('entity not matching Reference production: ' + a);
75
+ return a;
76
+ }
77
+ var k = complete.slice(1, -1);
56
78
  if (Object.hasOwnProperty.call(entityMap, k)) {
57
79
  return entityMap[k];
58
80
  } else if (k.charAt(0) === '#') {
@@ -66,7 +88,7 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
66
88
  function appendText(end) {
67
89
  //has some bugs
68
90
  if (end > start) {
69
- var xt = source.substring(start, end).replace(/&#?\w+;/g, entityReplacer);
91
+ var xt = source.substring(start, end).replace(ENTITY_REG, entityReplacer);
70
92
  locator && position(start);
71
93
  domBuilder.characters(xt, 0, end - start);
72
94
  start = end;
@@ -88,70 +110,83 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
88
110
  var locator = domBuilder.locator;
89
111
 
90
112
  var parseStack = [{ currentNSMap: defaultNSMapCopy }];
91
- var closeMap = {};
113
+ var unclosedTags = [];
92
114
  var start = 0;
93
115
  while (true) {
94
116
  try {
95
117
  var tagStart = source.indexOf('<', start);
96
118
  if (tagStart < 0) {
97
- if (!source.substr(start).match(/^\s*$/)) {
119
+ if (!isHTML && unclosedTags.length > 0) {
120
+ return errorHandler.fatalError('unclosed xml tag(s): ' + unclosedTags.join(', '));
121
+ }
122
+ if (!source.substring(start).match(/^\s*$/)) {
98
123
  var doc = domBuilder.doc;
99
124
  var text = doc.createTextNode(source.substr(start));
125
+ if (doc.documentElement) {
126
+ return errorHandler.error('Extra content at the end of the document');
127
+ }
100
128
  doc.appendChild(text);
101
129
  domBuilder.currentElement = text;
102
130
  }
103
131
  return;
104
132
  }
105
133
  if (tagStart > start) {
134
+ var fromSource = source.substring(start, tagStart);
135
+ if (!isHTML && unclosedTags.length === 0) {
136
+ fromSource = fromSource.replace(new RegExp(g.S_OPT.source, 'g'), '');
137
+ fromSource && errorHandler.error("Unexpected content outside root element: '" + fromSource + "'");
138
+ }
106
139
  appendText(tagStart);
107
140
  }
108
141
  switch (source.charAt(tagStart + 1)) {
109
142
  case '/':
110
- var config = parseStack.pop();
111
- var end = source.indexOf('>', tagStart + 3);
143
+ var end = source.indexOf('>', tagStart + 2);
112
144
  var tagNameRaw = source.substring(tagStart + 2, end > 0 ? end : undefined);
113
- var tagNameMatch = g.QName_group.exec(tagNameRaw);
114
- // for the root level the config does not contain the tagName
115
- var tagName =
116
- tagNameMatch && tagNameMatch[1] ? tagNameMatch[1] : config.tagName || domBuilder.doc.documentElement.tagName;
117
- if (end < 0) {
118
- errorHandler.error('end tag name: ' + tagName + ' is not complete');
119
- end = tagStart + 1 + tagName.length;
120
- } else if (tagNameRaw.match(/</) && !isHTML) {
121
- errorHandler.error('end tag name: ' + tagName + ' maybe not complete');
145
+ if (!tagNameRaw) {
146
+ return errorHandler.fatalError('end tag name missing');
147
+ }
148
+ var tagNameMatch = end > 0 && g.reg('^', g.QName_group, g.S_OPT, '$').exec(tagNameRaw);
149
+ if (!tagNameMatch) {
150
+ return errorHandler.fatalError('end tag name contains invalid characters: "' + tagNameRaw + '"');
151
+ }
152
+ if (!domBuilder.currentElement && !domBuilder.doc.documentElement) {
153
+ // not enough information to provide a helpful error message,
154
+ // but parsing will throw since there is no root element
155
+ return;
156
+ }
157
+ var currentTagName =
158
+ unclosedTags[unclosedTags.length - 1] ||
159
+ domBuilder.currentElement.tagName ||
160
+ domBuilder.doc.documentElement.tagName ||
161
+ '';
162
+ if (currentTagName !== tagNameMatch[1]) {
163
+ var tagNameLower = tagNameMatch[1].toLowerCase();
164
+ if (!isHTML || currentTagName.toLowerCase() !== tagNameLower) {
165
+ return errorHandler.fatalError('Opening and ending tag mismatch: "' + currentTagName + '" != "' + tagNameRaw + '"');
166
+ }
122
167
  }
168
+ var config = parseStack.pop();
169
+ unclosedTags.pop();
123
170
  var localNSMap = config.localNSMap;
124
- var endMatch = config.tagName == tagName;
125
- var endIgnoreCaseMach = endMatch || (config.tagName && config.tagName.toLowerCase() == tagName.toLowerCase());
126
- if (endIgnoreCaseMach) {
127
- domBuilder.endElement(config.uri, config.localName, tagName);
128
- if (localNSMap) {
129
- for (var prefix in localNSMap) {
130
- if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
131
- domBuilder.endPrefixMapping(prefix);
132
- }
171
+ domBuilder.endElement(config.uri, config.localName, currentTagName);
172
+ if (localNSMap) {
173
+ for (var prefix in localNSMap) {
174
+ if (Object.prototype.hasOwnProperty.call(localNSMap, prefix)) {
175
+ domBuilder.endPrefixMapping(prefix);
133
176
  }
134
177
  }
135
- if (!endMatch) {
136
- // No known test case
137
- return errorHandler.fatalError(
138
- 'end tag name: ' + tagName + ' is not match the current start tagName:' + config.tagName
139
- );
140
- }
141
- } else {
142
- parseStack.push(config);
143
178
  }
144
179
 
145
180
  end++;
146
181
  break;
147
- // end elment
182
+ // end element
148
183
  case '?': // <?...?>
149
184
  locator && position(tagStart);
150
185
  end = parseProcessingInstruction(source, tagStart, domBuilder, errorHandler);
151
186
  break;
152
187
  case '!': // <!doctype,<![CDATA,<!--
153
188
  locator && position(tagStart);
154
- end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler);
189
+ end = parseDoctypeCommentOrCData(source, tagStart, domBuilder, errorHandler, isHTML);
155
190
  break;
156
191
  default:
157
192
  locator && position(tagStart);
@@ -161,10 +196,11 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
161
196
  var end = parseElementStartPart(source, tagStart, el, currentNSMap, entityReplacer, errorHandler, isHTML);
162
197
  var len = el.length;
163
198
 
164
- if (!el.closed && fixSelfClosed(source, end, el.tagName, closeMap)) {
165
- el.closed = true;
166
- if (!isHTML) {
167
- errorHandler.warning('unclosed xml attribute');
199
+ if (!el.closed) {
200
+ if (isHTML && conventions.isHTMLVoidElement(el.tagName)) {
201
+ el.closed = true;
202
+ } else {
203
+ unclosedTags.push(el.tagName);
168
204
  }
169
205
  }
170
206
  if (locator && len) {
@@ -202,7 +238,7 @@ function parse(source, defaultNSMapCopy, entityMap, domBuilder, errorHandler) {
202
238
  if (end > start) {
203
239
  start = end;
204
240
  } else {
205
- //TODO: 这里有可能sax回退,有位置错误风险
241
+ //Possible sax fallback here, risk of positional error
206
242
  appendText(Math.max(tagStart, start) + 1);
207
243
  }
208
244
  }
@@ -228,13 +264,16 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
228
264
  if (el.attributeNames.hasOwnProperty(qname)) {
229
265
  return errorHandler.fatalError('Attribute ' + qname + ' redefined');
230
266
  }
267
+ if (!isHTML && value.indexOf('<') >= 0) {
268
+ return errorHandler.fatalError("Unescaped '<' not allowed in attributes values");
269
+ }
231
270
  el.addValue(
232
271
  qname,
233
272
  // @see https://www.w3.org/TR/xml/#AVNormalize
234
273
  // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
235
274
  // - recursive replacement of (DTD) entity references
236
275
  // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
237
- value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
276
+ value.replace(/[\t\n\r]/g, ' ').replace(ENTITY_REG, entityReplacer),
238
277
  startIndex
239
278
  );
240
279
  }
@@ -346,7 +385,9 @@ function parseElementStartPart(source, start, el, currentNSMap, entityReplacer,
346
385
  }
347
386
  break;
348
387
  case S_EQ:
349
- throw new Error('attribute value missed!!');
388
+ if (!isHTML) {
389
+ return errorHandler.fatalError('AttValue: \' or " expected');
390
+ }
350
391
  }
351
392
  return p;
352
393
  /*xml space '\x20' | #x9 | #xD | #xA; */
@@ -506,7 +547,7 @@ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, do
506
547
  var text = source.substring(elStartEnd + 1, elEndStart);
507
548
 
508
549
  if (isEscapableRaw) {
509
- text = text.replace(/&#?\w+;/g, entityReplacer);
550
+ text = text.replace(ENTITY_REG, entityReplacer);
510
551
  }
511
552
  domBuilder.characters(text, 0, text.length);
512
553
  return elEndStart;
@@ -514,22 +555,6 @@ function parseHtmlSpecialContent(source, elStartEnd, tagName, entityReplacer, do
514
555
  return elStartEnd + 1;
515
556
  }
516
557
 
517
- function fixSelfClosed(source, elStartEnd, tagName, closeMap) {
518
- //if(tagName in closeMap){
519
- var pos = closeMap[tagName];
520
- if (pos == null) {
521
- //console.log(tagName)
522
- pos = source.lastIndexOf('</' + tagName + '>');
523
- if (pos < elStartEnd) {
524
- //忘记闭合
525
- pos = source.lastIndexOf('</' + tagName);
526
- }
527
- closeMap[tagName] = pos;
528
- }
529
- return pos < elStartEnd;
530
- //}
531
- }
532
-
533
558
  function _copy(source, target) {
534
559
  for (var n in source) {
535
560
  if (Object.prototype.hasOwnProperty.call(source, n)) {
@@ -718,11 +743,12 @@ function parseDoctypeInternalSubset(p, errorHandler) {
718
743
  * the start index of the '<!'
719
744
  * @param {DOMHandler} domBuilder
720
745
  * @param {DOMHandler} errorHandler
746
+ * @param {boolean} isHTML
721
747
  * @returns {number | never} The end index of the element.
722
748
  * @throws {ParseError}
723
749
  * In case the element is not well-formed.
724
750
  */
725
- function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler) {
751
+ function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler, isHTML) {
726
752
  var p = parseUtils(source, start);
727
753
 
728
754
  switch (p.char(2)) {
@@ -739,6 +765,9 @@ function parseDoctypeCommentOrCData(source, start, domBuilder, errorHandler) {
739
765
  // should be CDATA
740
766
  var cdata = p.getMatch(g.CDSect);
741
767
  if (cdata) {
768
+ if (!isHTML && !domBuilder.currentElement) {
769
+ return errorHandler.fatalError('CDATA outside of element');
770
+ }
742
771
  domBuilder.startCDATA();
743
772
  domBuilder.characters(cdata, g.CDATA_START.length, cdata.length - g.CDATA_START.length - g.CDATA_END.length);
744
773
  domBuilder.endCDATA();
package/package.json CHANGED
@@ -1,73 +1,73 @@
1
1
  {
2
- "name": "@xmldom/xmldom",
3
- "version": "0.9.0-beta.10",
4
- "description": "A pure JavaScript W3C standard-based (XML DOM Level 2 Core) DOMParser and XMLSerializer module.",
5
- "keywords": [
6
- "w3c",
7
- "dom",
8
- "xml",
9
- "parser",
10
- "javascript",
11
- "DOMParser",
12
- "XMLSerializer",
13
- "ponyfill"
14
- ],
15
- "homepage": "https://github.com/xmldom/xmldom",
16
- "repository": {
17
- "type": "git",
18
- "url": "git://github.com/xmldom/xmldom.git"
19
- },
20
- "main": "lib/index.js",
21
- "types": "index.d.ts",
22
- "files": [
23
- "CHANGELOG.md",
24
- "LICENSE",
25
- "readme.md",
26
- "SECURITY.md",
27
- "index.d.ts",
28
- "lib"
29
- ],
30
- "scripts": {
31
- "lint": "eslint examples lib test",
32
- "format": "prettier --write examples lib test",
33
- "changelog": "auto-changelog --unreleased-only",
34
- "start": "nodemon --watch package.json --watch lib --watch test --exec 'npm --silent run test && npm --silent run lint'",
35
- "test": "jest",
36
- "test:types": "cd examples/typescript-node-es6 && ./pretest.sh 3 && ./pretest.sh 4 && ./pretest.sh 5 && node dist/index.js",
37
- "testrelease": "npm test && eslint lib",
38
- "version": "./changelog-has-version.sh",
39
- "release": "np --no-yarn --test-script testrelease"
40
- },
41
- "engines": {
42
- "node": ">=10.0.0"
43
- },
44
- "dependencies": {},
45
- "devDependencies": {
46
- "@homer0/prettier-plugin-jsdoc": "6.0.5",
47
- "auto-changelog": "2.4.0",
48
- "eslint": "8.44.0",
49
- "eslint-config-prettier": "8.8.0",
50
- "eslint-plugin-anti-trojan-source": "1.1.1",
51
- "eslint-plugin-es5": "1.5.0",
52
- "eslint-plugin-node": "11.1.0",
53
- "eslint-plugin-prettier": "4.2.1",
54
- "get-stream": "6.0.1",
55
- "jest": "27.5.1",
56
- "nodemon": "3.0.1",
57
- "np": "7.7.0",
58
- "prettier": "2.8.8",
59
- "rxjs": "7.8.1",
60
- "xmltest": "1.5.0",
61
- "yauzl": "2.10.0"
62
- },
63
- "bugs": {
64
- "url": "https://github.com/xmldom/xmldom/issues"
65
- },
66
- "license": "MIT",
67
- "auto-changelog": {
68
- "prepend": true,
69
- "remote": "upstream",
70
- "tagPrefix": "",
71
- "template": "./auto-changelog.hbs"
72
- }
2
+ "name": "@xmldom/xmldom",
3
+ "version": "0.9.0-beta.11",
4
+ "description": "A pure JavaScript W3C standard-based (XML DOM Level 2 Core) DOMParser and XMLSerializer module.",
5
+ "keywords": [
6
+ "w3c",
7
+ "dom",
8
+ "xml",
9
+ "parser",
10
+ "javascript",
11
+ "DOMParser",
12
+ "XMLSerializer",
13
+ "ponyfill"
14
+ ],
15
+ "homepage": "https://github.com/xmldom/xmldom",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "git://github.com/xmldom/xmldom.git"
19
+ },
20
+ "main": "lib/index.js",
21
+ "types": "index.d.ts",
22
+ "files": [
23
+ "CHANGELOG.md",
24
+ "LICENSE",
25
+ "readme.md",
26
+ "SECURITY.md",
27
+ "index.d.ts",
28
+ "lib"
29
+ ],
30
+ "scripts": {
31
+ "lint": "eslint examples lib test",
32
+ "format": "prettier --write examples lib test",
33
+ "changelog": "auto-changelog --unreleased-only",
34
+ "start": "nodemon --watch package.json --watch lib --watch test --exec 'npm --silent run test && npm --silent run lint'",
35
+ "test": "jest",
36
+ "test:types": "cd examples/typescript-node-es6 && ./pretest.sh 3 && ./pretest.sh 4 && ./pretest.sh 5 && node dist/index.js",
37
+ "testrelease": "npm test && eslint lib",
38
+ "version": "./changelog-has-version.sh",
39
+ "release": "np --no-yarn --test-script testrelease"
40
+ },
41
+ "engines": {
42
+ "node": ">=10.0.0"
43
+ },
44
+ "dependencies": {},
45
+ "devDependencies": {
46
+ "@homer0/prettier-plugin-jsdoc": "7.0.2",
47
+ "auto-changelog": "2.4.0",
48
+ "eslint": "8.45.0",
49
+ "eslint-config-prettier": "8.8.0",
50
+ "eslint-plugin-anti-trojan-source": "1.1.1",
51
+ "eslint-plugin-es5": "1.5.0",
52
+ "eslint-plugin-node": "11.1.0",
53
+ "eslint-plugin-prettier": "5.0.0",
54
+ "get-stream": "6.0.1",
55
+ "jest": "27.5.1",
56
+ "nodemon": "3.0.1",
57
+ "np": "7.7.0",
58
+ "prettier": "3.0.0",
59
+ "rxjs": "7.8.1",
60
+ "xmltest": "1.5.0",
61
+ "yauzl": "2.10.0"
62
+ },
63
+ "bugs": {
64
+ "url": "https://github.com/xmldom/xmldom/issues"
65
+ },
66
+ "license": "MIT",
67
+ "auto-changelog": {
68
+ "prepend": true,
69
+ "remote": "upstream",
70
+ "tagPrefix": "",
71
+ "template": "./auto-changelog.hbs"
72
+ }
73
73
  }