@xmldom/xmldom 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,27 @@ All notable changes to this project will be documented in this file.
4
4
 
5
5
  This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [0.8.0](https://github.com/xmldom/xmldom/compare/0.7.5...0.8.0)
8
+
9
+ ### Fixed
10
+ - Normalize all line endings according to XML specs [1.0](https://w3.org/TR/xml/#sec-line-ends) and [1.1](https://www.w3.org/TR/xml11/#sec-line-ends) \
11
+ BREAKING CHANGE: Certain combination of line break characters are normalized to a single `\n` before parsing takes place and will no longer be preserved.
12
+ - [`#303`](https://github.com/xmldom/xmldom/issues/303) / [`#307`](https://github.com/xmldom/xmldom/pull/307)
13
+ - [`#49`](https://github.com/xmldom/xmldom/issues/49), [`#97`](https://github.com/xmldom/xmldom/issues/97), [`#324`](https://github.com/xmldom/xmldom/issues/324) / [`#314`](https://github.com/xmldom/xmldom/pull/314)
14
+ - XMLSerializer: Preserve whitespace character references [`#284`](https://github.com/xmldom/xmldom/issues/284) / [`#310`](https://github.com/xmldom/xmldom/pull/310) \
15
+ BREAKING CHANGE: If you relied on the not spec compliant preservation of literal `\t`, `\n` or `\r` in **attribute values**.
16
+ To preserve those you will have to create XML that instead contains the correct numerical (or hexadecimal) equivalent (e.g. `	`, `
`, `
`).
17
+ - Drop deprecated exports `DOMImplementation` and `XMLSerializer` from `lib/dom-parser.js` [#53](https://github.com/xmldom/xmldom/issues/53) / [`#309`](https://github.com/xmldom/xmldom/pull/309)
18
+ BREAKING CHANGE: Use the one provided by the main package export.
19
+ - dom: Remove all links as part of `removeChild` [`#343`](https://github.com/xmldom/xmldom/issues/343) / [`#355`](https://github.com/xmldom/xmldom/pull/355)
20
+
21
+ ### Chore
22
+ - ci: Restore latest tested node version to 16.x [`#325`](https://github.com/xmldom/xmldom/pull/325)
23
+ - ci: Split test and lint steps into jobs [`#111`](https://github.com/xmldom/xmldom/issues/111) / [`#304`](https://github.com/xmldom/xmldom/pull/304)
24
+ - Pinned and updated devDependencies
25
+
26
+ Thank you [@marrus-sh](https://github.com/marrus-sh), [@victorandree](https://github.com/victorandree), [@mdierolf](https://github.com/mdierolf), [@tsabbay](https://github.com/tsabbay), [@fatihpense](https://github.com/fatihpense) for your contributions
27
+
7
28
  ## 0.7.5
8
29
 
9
30
  [Commits](https://github.com/xmldom/xmldom/compare/0.7.4...0.7.5)
package/lib/dom-parser.js CHANGED
@@ -10,6 +10,64 @@ var NAMESPACE = conventions.NAMESPACE;
10
10
  var ParseError = sax.ParseError;
11
11
  var XMLReader = sax.XMLReader;
12
12
 
13
+ /**
14
+ * Normalizes line ending according to https://www.w3.org/TR/xml11/#sec-line-ends:
15
+ *
16
+ * > XML parsed entities are often stored in computer files which,
17
+ * > for editing convenience, are organized into lines.
18
+ * > These lines are typically separated by some combination
19
+ * > of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
20
+ * >
21
+ * > To simplify the tasks of applications, the XML processor must behave
22
+ * > as if it normalized all line breaks in external parsed entities (including the document entity)
23
+ * > on input, before parsing, by translating all of the following to a single #xA character:
24
+ * >
25
+ * > 1. the two-character sequence #xD #xA
26
+ * > 2. the two-character sequence #xD #x85
27
+ * > 3. the single character #x85
28
+ * > 4. the single character #x2028
29
+ * > 5. any #xD character that is not immediately followed by #xA or #x85.
30
+ *
31
+ * @param {string} input
32
+ * @returns {string}
33
+ */
34
+ function normalizeLineEndings(input) {
35
+ return input
36
+ .replace(/\r[\n\u0085]/g, '\n')
37
+ .replace(/[\r\u0085\u2028]/g, '\n')
38
+ }
39
+
40
+ /**
41
+ * @typedef Locator
42
+ * @property {number} [columnNumber]
43
+ * @property {number} [lineNumber]
44
+ */
45
+
46
+ /**
47
+ * @typedef DOMParserOptions
48
+ * @property {DOMHandler} [domBuilder]
49
+ * @property {Function} [errorHandler]
50
+ * @property {(string) => string} [normalizeLineEndings] used to replace line endings before parsing
51
+ * defaults to `normalizeLineEndings`
52
+ * @property {Locator} [locator]
53
+ * @property {Record<string, string>} [xmlns]
54
+ *
55
+ * @see normalizeLineEndings
56
+ */
57
+
58
+ /**
59
+ * The DOMParser interface provides the ability to parse XML or HTML source code
60
+ * from a string into a DOM `Document`.
61
+ *
62
+ * _xmldom is different from the spec in that it allows an `options` parameter,
63
+ * to override the default behavior._
64
+ *
65
+ * @param {DOMParserOptions} [options]
66
+ * @constructor
67
+ *
68
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/DOMParser
69
+ * @see https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-parsing-and-serialization
70
+ */
13
71
  function DOMParser(options){
14
72
  this.options = options ||{locator:{}};
15
73
  }
@@ -33,10 +91,15 @@ DOMParser.prototype.parseFromString = function(source,mimeType){
33
91
  defaultNSMap[''] = NAMESPACE.HTML;
34
92
  }
35
93
  defaultNSMap.xml = defaultNSMap.xml || NAMESPACE.XML;
36
- if(source && typeof source === 'string'){
37
- sax.parse(source,defaultNSMap,entityMap);
38
- }else{
39
- sax.errorHandler.error("invalid doc source");
94
+ var normalize = options.normalizeLineEndings || normalizeLineEndings;
95
+ if (source && typeof source === 'string') {
96
+ sax.parse(
97
+ normalize(source),
98
+ defaultNSMap,
99
+ entityMap
100
+ )
101
+ } else {
102
+ sax.errorHandler.error('invalid doc source')
40
103
  }
41
104
  return domBuilder.doc;
42
105
  }
@@ -255,14 +318,5 @@ function appendElement (hander,node) {
255
318
  }//appendChild and setAttributeNS are preformance key
256
319
 
257
320
  exports.__DOMHandler = DOMHandler;
321
+ exports.normalizeLineEndings = normalizeLineEndings;
258
322
  exports.DOMParser = DOMParser;
259
-
260
- /**
261
- * @deprecated Import/require from main entry point instead
262
- */
263
- exports.DOMImplementation = dom.DOMImplementation;
264
-
265
- /**
266
- * @deprecated Import/require from main entry point instead
267
- */
268
- exports.XMLSerializer = dom.XMLSerializer;
package/lib/dom.js CHANGED
@@ -591,48 +591,67 @@ function _onRemoveAttribute(doc,el,newAttr,remove){
591
591
  }
592
592
  }
593
593
 
594
- function _onUpdateChild(doc,el,newChild){
594
+ /**
595
+ * Updates `el.childNodes`, updating the indexed items and it's `length`.
596
+ * Passing `newChild` means it will be appended.
597
+ * Otherwise it's assumed that an item has been removed,
598
+ * and `el.firstNode` and it's `.nextSibling` are used
599
+ * to walk the current list of child nodes.
600
+ *
601
+ * @param {Document} doc
602
+ * @param {Node} el
603
+ * @param {Node} [newChild]
604
+ * @private
605
+ */
606
+ function _onUpdateChild (doc, el, newChild) {
595
607
  if(doc && doc._inc){
596
608
  doc._inc++;
597
609
  //update childNodes
598
610
  var cs = el.childNodes;
599
- if(newChild){
611
+ if (newChild) {
600
612
  cs[cs.length++] = newChild;
601
- }else{
602
- //console.log(1)
613
+ } else {
603
614
  var child = el.firstChild;
604
615
  var i = 0;
605
- while(child){
616
+ while (child) {
606
617
  cs[i++] = child;
607
- child =child.nextSibling;
618
+ child = child.nextSibling;
608
619
  }
609
620
  cs.length = i;
621
+ delete cs[cs.length];
610
622
  }
611
623
  }
612
624
  }
613
625
 
614
626
  /**
615
- * attributes;
616
- * children;
617
- *
618
- * writeable properties:
619
- * nodeValue,Attr:value,CharacterData:data
620
- * prefix
627
+ * Removes the connections between `parentNode` and `child`
628
+ * and any existing `child.previousSibling` or `child.nextSibling`.
629
+ *
630
+ * @see https://github.com/xmldom/xmldom/issues/135
631
+ * @see https://github.com/xmldom/xmldom/issues/145
632
+ *
633
+ * @param {Node} parentNode
634
+ * @param {Node} child
635
+ * @returns {Node} the child that was removed.
636
+ * @private
621
637
  */
622
- function _removeChild(parentNode,child){
638
+ function _removeChild (parentNode, child) {
623
639
  var previous = child.previousSibling;
624
640
  var next = child.nextSibling;
625
- if(previous){
641
+ if (previous) {
626
642
  previous.nextSibling = next;
627
- }else{
628
- parentNode.firstChild = next
643
+ } else {
644
+ parentNode.firstChild = next;
629
645
  }
630
- if(next){
646
+ if (next) {
631
647
  next.previousSibling = previous;
632
- }else{
648
+ } else {
633
649
  parentNode.lastChild = previous;
634
650
  }
635
- _onUpdateChild(parentNode.ownerDocument,parentNode);
651
+ child.parentNode = null;
652
+ child.previousSibling = null;
653
+ child.nextSibling = null;
654
+ _onUpdateChild(parentNode.ownerDocument, parentNode);
636
655
  return child;
637
656
  }
638
657
  /**
@@ -678,27 +697,35 @@ function _insertBefore(parentNode,newChild,nextChild){
678
697
  }
679
698
  return newChild;
680
699
  }
681
- function _appendSingleChild(parentNode,newChild){
682
- var cp = newChild.parentNode;
683
- if(cp){
684
- var pre = parentNode.lastChild;
685
- cp.removeChild(newChild);//remove and update
686
- var pre = parentNode.lastChild;
700
+
701
+ /**
702
+ * Appends `newChild` to `parentNode`.
703
+ * If `newChild` is already connected to a `parentNode` it is first removed from it.
704
+ *
705
+ * @see https://github.com/xmldom/xmldom/issues/135
706
+ * @see https://github.com/xmldom/xmldom/issues/145
707
+ * @param {Node} parentNode
708
+ * @param {Node} newChild
709
+ * @returns {Node}
710
+ * @private
711
+ */
712
+ function _appendSingleChild (parentNode, newChild) {
713
+ if (newChild.parentNode) {
714
+ newChild.parentNode.removeChild(newChild);
687
715
  }
688
- var pre = parentNode.lastChild;
689
716
  newChild.parentNode = parentNode;
690
- newChild.previousSibling = pre;
717
+ newChild.previousSibling = parentNode.lastChild;
691
718
  newChild.nextSibling = null;
692
- if(pre){
693
- pre.nextSibling = newChild;
694
- }else{
719
+ if (newChild.previousSibling) {
720
+ newChild.previousSibling.nextSibling = newChild;
721
+ } else {
695
722
  parentNode.firstChild = newChild;
696
723
  }
697
724
  parentNode.lastChild = newChild;
698
- _onUpdateChild(parentNode.ownerDocument,parentNode,newChild);
725
+ _onUpdateChild(parentNode.ownerDocument, parentNode, newChild);
699
726
  return newChild;
700
- //console.log("__aa",parentNode.lastChild.nextSibling == null)
701
727
  }
728
+
702
729
  Document.prototype = {
703
730
  //implementation : null,
704
731
  nodeName : '#document',
@@ -1151,12 +1178,18 @@ function needNamespaceDefine(node, isHTML, visibleNamespaces) {
1151
1178
  }
1152
1179
  /**
1153
1180
  * Well-formed constraint: No < in Attribute Values
1154
- * The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <.
1155
- * @see https://www.w3.org/TR/xml/#CleanAttrVals
1156
- * @see https://www.w3.org/TR/xml/#NT-AttValue
1181
+ * > The replacement text of any entity referred to directly or indirectly
1182
+ * > in an attribute value must not contain a <.
1183
+ * @see https://www.w3.org/TR/xml11/#CleanAttrVals
1184
+ * @see https://www.w3.org/TR/xml11/#NT-AttValue
1185
+ *
1186
+ * Literal whitespace other than space that appear in attribute values
1187
+ * are serialized as their entity references, so they will be preserved.
1188
+ * (In contrast to whitespace literals in the input which are normalized to spaces)
1189
+ * @see https://www.w3.org/TR/xml11/#AVNormalize
1157
1190
  */
1158
1191
  function addSerializedAttribute(buf, qualifiedName, value) {
1159
- buf.push(' ', qualifiedName, '="', value.replace(/[<&"]/g,_xmlEncoder), '"')
1192
+ buf.push(' ', qualifiedName, '="', value.replace(/[<&"\t\n\r]/g, _xmlEncoder), '"')
1160
1193
  }
1161
1194
 
1162
1195
  function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){
package/lib/sax.js CHANGED
@@ -233,7 +233,15 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
233
233
  if (el.attributeNames.hasOwnProperty(qname)) {
234
234
  errorHandler.fatalError('Attribute ' + qname + ' redefined')
235
235
  }
236
- el.addValue(qname, value, startIndex)
236
+ el.addValue(
237
+ qname,
238
+ // @see https://www.w3.org/TR/xml/#AVNormalize
239
+ // since the xmldom sax parser does not "interpret" DTD the following is not implemented:
240
+ // - recursive replacement of (DTD) entity references
241
+ // - trimming and collapsing multiple spaces into a single one for attributes that are not of type CDATA
242
+ value.replace(/[\t\n\r]/g, ' ').replace(/&#?\w+;/g, entityReplacer),
243
+ startIndex
244
+ )
237
245
  }
238
246
  var attrName;
239
247
  var value;
@@ -264,7 +272,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
264
272
  start = p+1;
265
273
  p = source.indexOf(c,start)
266
274
  if(p>0){
267
- value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
275
+ value = source.slice(start, p);
268
276
  addAttribute(attrName, value, start-1);
269
277
  s = S_ATTR_END;
270
278
  }else{
@@ -272,10 +280,8 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
272
280
  throw new Error('attribute value no end \''+c+'\' match');
273
281
  }
274
282
  }else if(s == S_ATTR_NOQUOT_VALUE){
275
- value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
276
- //console.log(attrName,value,start,p)
283
+ value = source.slice(start, p);
277
284
  addAttribute(attrName, value, start);
278
- //console.dir(el)
279
285
  errorHandler.warning('attribute "'+attrName+'" missed start quot('+c+')!!');
280
286
  start = p+1;
281
287
  s = S_ATTR_END
@@ -329,7 +335,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
329
335
  }
330
336
  if(s == S_ATTR_NOQUOT_VALUE){
331
337
  errorHandler.warning('attribute "'+value+'" missed quot(")!');
332
- addAttribute(attrName, value.replace(/&#?\w+;/g,entityReplacer), start)
338
+ addAttribute(attrName, value, start)
333
339
  }else{
334
340
  if(!NAMESPACE.isHTML(currentNSMap['']) || !value.match(/^(?:disabled|checked|selected)$/i)){
335
341
  errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
@@ -357,7 +363,7 @@ function parseElementStartPart(source,start,el,currentNSMap,entityReplacer,error
357
363
  s = S_ATTR_SPACE;
358
364
  break;
359
365
  case S_ATTR_NOQUOT_VALUE:
360
- var value = source.slice(start,p).replace(/&#?\w+;/g,entityReplacer);
366
+ var value = source.slice(start, p);
361
367
  errorHandler.warning('attribute "'+value+'" missed quot(")!!');
362
368
  addAttribute(attrName, value, start)
363
369
  case S_ATTR_END:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xmldom/xmldom",
3
- "version": "0.7.5",
3
+ "version": "0.8.0",
4
4
  "description": "A pure JavaScript W3C standard-based (XML DOM Level 2 Core) DOMParser and XMLSerializer module.",
5
5
  "keywords": [
6
6
  "w3c",
@@ -38,17 +38,17 @@
38
38
  },
39
39
  "dependencies": {},
40
40
  "devDependencies": {
41
- "@stryker-mutator/core": "^5.2.2",
42
- "eslint": "^7.32.0",
43
- "eslint-config-prettier": "^8.3.0",
44
- "eslint-plugin-es5": "^1.5.0",
45
- "eslint-plugin-prettier": "^3.4.1",
46
- "get-stream": "^6.0.1",
47
- "jest": "^27.0.6",
48
- "nodemon": "^2.0.12",
49
- "prettier": "^2.3.2",
50
- "xmltest": "^1.5.0",
51
- "yauzl": "^2.10.0"
41
+ "@stryker-mutator/core": "5.5.1",
42
+ "eslint": "8.5.0",
43
+ "eslint-config-prettier": "8.3.0",
44
+ "eslint-plugin-es5": "1.5.0",
45
+ "eslint-plugin-prettier": "4.0.0",
46
+ "get-stream": "6.0.1",
47
+ "jest": "27.4.5",
48
+ "nodemon": "2.0.15",
49
+ "prettier": "2.5.1",
50
+ "xmltest": "1.5.0",
51
+ "yauzl": "2.10.0"
52
52
  },
53
53
  "bugs": {
54
54
  "url": "https://github.com/xmldom/xmldom/issues"