node-html-parser 7.0.1 → 7.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [7.0.2](https://github.com/taoqf/node-fast-html-parser/compare/v7.0.1...v7.0.2) (2026-01-07)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * [#227](https://github.com/taoqf/node-fast-html-parser/issues/227) ([51528c4](https://github.com/taoqf/node-fast-html-parser/commit/51528c41ef2648d6c4dc1aecd14ee9d2b0083c4f))
11
+ * [#294](https://github.com/taoqf/node-fast-html-parser/issues/294) Closing tag is missing but valid HTML is still not parseable ([950865f](https://github.com/taoqf/node-fast-html-parser/commit/950865fab5f4df7853b36712869b71c90f4d3a1b))
12
+ * add missing dev dependency: yarn ([6d73ea3](https://github.com/taoqf/node-fast-html-parser/commit/6d73ea37c48f4170c35907869ba410c5122a9a1f))
13
+ * test valid.js ([a81fc46](https://github.com/taoqf/node-fast-html-parser/commit/a81fc46fab2507615b0362150d62568a6f52ee4e))
14
+
5
15
  ### [7.0.1](https://github.com/taoqf/node-fast-html-parser/compare/v7.0.0...v7.0.1) (2024-12-26)
6
16
 
7
17
 
package/README.md CHANGED
@@ -43,6 +43,7 @@ import { parse } from 'node-html-parser';
43
43
 
44
44
  const root = parse('<ul id="list"><li>Hello World</li></ul>');
45
45
 
46
+ // parse() adds a wrapper node, so the input data's first node is the root's first child node
46
47
  console.log(root.firstChild.structure);
47
48
  // ul#list
48
49
  // li
@@ -74,7 +75,7 @@ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
74
75
 
75
76
  ### parse(data[, options])
76
77
 
77
- Parse the data provided, and return the root of the generated DOM.
78
+ Parse the data provided, wrap the result in a new node, and return the root of the generated DOM.
78
79
 
79
80
  - **data**, data to parse
80
81
  - **options**, parse options
@@ -349,11 +350,11 @@ Get all child elements, so all child nodes of type HTMLELement.
349
350
 
350
351
  ### firstChild
351
352
 
352
- Get first child node. `undefined` if the node has no children.
353
+ Get first child node of the wrapper node added by `parse()`. `undefined` if the node has no children.
353
354
 
354
355
  ### lastChild
355
356
 
356
- Get last child node. `undefined` if the node has no children.
357
+ Get last child node of the wrapper node added by `parse()`. `undefined` if the node has no children.
357
358
 
358
359
  ### firstElementChild
359
360
 
package/dist/main.js CHANGED
@@ -503,7 +503,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
503
503
  _this.nodeType = type_3.default.ELEMENT_NODE;
504
504
  _this.rawTagName = tagName;
505
505
  _this.rawAttrs = rawAttrs || '';
506
- _this.id = keyAttrs.id || '';
506
+ _this._id = keyAttrs.id || '';
507
507
  _this.childNodes = [];
508
508
  _this._parseOptions = _parseOptions;
509
509
  _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
@@ -590,6 +590,16 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
590
590
  enumerable: false,
591
591
  configurable: true
592
592
  });
593
+ Object.defineProperty(HTMLElement.prototype, "id", {
594
+ get: function () {
595
+ return this._id;
596
+ },
597
+ set: function (newid) {
598
+ this.setAttribute('id', newid);
599
+ },
600
+ enumerable: false,
601
+ configurable: true
602
+ });
593
603
  Object.defineProperty(HTMLElement.prototype, "rawText", {
594
604
  /**
595
605
  * Get escpaed (as-it) text value of current node and its children.
@@ -786,7 +796,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
786
796
  res.push(' '.repeat(indention) + str);
787
797
  }
788
798
  function dfs(node) {
789
- var idStr = node.id ? "#".concat(node.id) : '';
799
+ var idStr = node._id ? "#".concat(node._id) : '';
790
800
  var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
791
801
  write("".concat(node.rawTagName).concat(idStr).concat(classStr));
792
802
  indention++;
@@ -923,7 +933,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
923
933
  continue;
924
934
  }
925
935
  if (child.nodeType === type_3.default.ELEMENT_NODE) {
926
- if (child.id === id) {
936
+ if (child._id === id) {
927
937
  return child;
928
938
  }
929
939
  // if children are existing push the current status to the stack and keep searching for elements in the level below
@@ -1073,9 +1083,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1073
1083
  return "".concat(name, "=").concat(val);
1074
1084
  })
1075
1085
  .join(' ');
1076
- // Update this.id
1086
+ // Update this._id
1077
1087
  if (key === 'id') {
1078
- this.id = '';
1088
+ this._id = '';
1079
1089
  }
1080
1090
  return this;
1081
1091
  };
@@ -1121,9 +1131,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1121
1131
  return "".concat(name, "=").concat(val);
1122
1132
  })
1123
1133
  .join(' ');
1124
- // Update this.id
1134
+ // Update this._id
1125
1135
  if (key === 'id') {
1126
- this.id = value;
1136
+ this._id = value;
1127
1137
  }
1128
1138
  return this;
1129
1139
  };
@@ -1150,6 +1160,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1150
1160
  return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
1151
1161
  })
1152
1162
  .join(' ');
1163
+ // Update this._id
1164
+ if ('id' in attributes) {
1165
+ this._id = attributes['id'];
1166
+ }
1153
1167
  return this;
1154
1168
  };
1155
1169
  HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
@@ -1421,6 +1435,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1421
1435
  th: { tr: true, table: true, TR: true, TABLE: true },
1422
1436
  TH: { tr: true, table: true, TR: true, TABLE: true },
1423
1437
  };
1438
+ var kElementsClosedByClosingExcept = {
1439
+ p: { a: true, audio: true, del: true, ins: true, map: true, noscript: true, video: true },
1440
+ };
1424
1441
  var frameflag = 'documentfragmentcontainer';
1425
1442
  /**
1426
1443
  * Parses HTML and returns a root element
@@ -1566,6 +1583,25 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1566
1583
  continue;
1567
1584
  }
1568
1585
  }
1586
+ var openTag = currentParent.rawTagName ?
1587
+ currentParent.rawTagName.toLowerCase() :
1588
+ '';
1589
+ if (kElementsClosedByClosingExcept[openTag]) {
1590
+ var closingTag = tagName.toLowerCase();
1591
+ if (stack.length > 1) {
1592
+ var possibleContainer = stack[stack.length - 2];
1593
+ if (possibleContainer &&
1594
+ possibleContainer.rawTagName &&
1595
+ possibleContainer.rawTagName.toLowerCase() === closingTag &&
1596
+ !kElementsClosedByClosingExcept[openTag][closingTag]) {
1597
+ // Update range end for closed tag
1598
+ currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1599
+ stack.pop();
1600
+ currentParent = (0, back_1.default)(stack);
1601
+ continue;
1602
+ }
1603
+ }
1604
+ }
1569
1605
  // Use aggressive strategy to handle unmatching markups.
1570
1606
  break;
1571
1607
  }
@@ -43,8 +43,8 @@ export default class HTMLElement extends Node {
43
43
  private _attrs;
44
44
  private _rawAttrs;
45
45
  private _parseOptions;
46
+ private _id;
46
47
  rawTagName: string;
47
- id: string;
48
48
  classList: DOMTokenList;
49
49
  /**
50
50
  * Node Type declaration.
@@ -79,6 +79,8 @@ export default class HTMLElement extends Node {
79
79
  set tagName(newname: string);
80
80
  get localName(): string;
81
81
  get isVoidElement(): boolean;
82
+ get id(): string;
83
+ set id(newid: string);
82
84
  /**
83
85
  * Get escpaed (as-it) text value of current node and its children.
84
86
  * @return {string} text content
@@ -126,7 +126,7 @@ class HTMLElement extends node_1.default {
126
126
  this.nodeType = type_1.default.ELEMENT_NODE;
127
127
  this.rawTagName = tagName;
128
128
  this.rawAttrs = rawAttrs || '';
129
- this.id = keyAttrs.id || '';
129
+ this._id = keyAttrs.id || '';
130
130
  this.childNodes = [];
131
131
  this._parseOptions = _parseOptions;
132
132
  this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], (classList) => this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
@@ -185,6 +185,12 @@ class HTMLElement extends node_1.default {
185
185
  get isVoidElement() {
186
186
  return this.voidTag.isVoidElement(this.localName);
187
187
  }
188
+ get id() {
189
+ return this._id;
190
+ }
191
+ set id(newid) {
192
+ this.setAttribute('id', newid);
193
+ }
188
194
  /**
189
195
  * Get escpaed (as-it) text value of current node and its children.
190
196
  * @return {string} text content
@@ -350,7 +356,7 @@ class HTMLElement extends node_1.default {
350
356
  res.push(' '.repeat(indention) + str);
351
357
  }
352
358
  function dfs(node) {
353
- const idStr = node.id ? `#${node.id}` : '';
359
+ const idStr = node._id ? `#${node._id}` : '';
354
360
  const classStr = node.classList.length ? `.${node.classList.value.join('.')}` : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
355
361
  write(`${node.rawTagName}${idStr}${classStr}`);
356
362
  indention++;
@@ -483,7 +489,7 @@ class HTMLElement extends node_1.default {
483
489
  continue;
484
490
  }
485
491
  if (child.nodeType === type_1.default.ELEMENT_NODE) {
486
- if (child.id === id) {
492
+ if (child._id === id) {
487
493
  return child;
488
494
  }
489
495
  // if children are existing push the current status to the stack and keep searching for elements in the level below
@@ -623,9 +629,9 @@ class HTMLElement extends node_1.default {
623
629
  return `${name}=${val}`;
624
630
  })
625
631
  .join(' ');
626
- // Update this.id
632
+ // Update this._id
627
633
  if (key === 'id') {
628
- this.id = '';
634
+ this._id = '';
629
635
  }
630
636
  return this;
631
637
  }
@@ -670,9 +676,9 @@ class HTMLElement extends node_1.default {
670
676
  return `${name}=${val}`;
671
677
  })
672
678
  .join(' ');
673
- // Update this.id
679
+ // Update this._id
674
680
  if (key === 'id') {
675
- this.id = value;
681
+ this._id = value;
676
682
  }
677
683
  return this;
678
684
  }
@@ -698,6 +704,10 @@ class HTMLElement extends node_1.default {
698
704
  return `${name}=${this.quoteAttribute(String(val))}`;
699
705
  })
700
706
  .join(' ');
707
+ // Update this._id
708
+ if ('id' in attributes) {
709
+ this._id = attributes['id'];
710
+ }
701
711
  return this;
702
712
  }
703
713
  insertAdjacentHTML(where, html) {
@@ -905,6 +915,9 @@ const kElementsClosedByClosing = {
905
915
  th: { tr: true, table: true, TR: true, TABLE: true },
906
916
  TH: { tr: true, table: true, TR: true, TABLE: true },
907
917
  };
918
+ const kElementsClosedByClosingExcept = {
919
+ p: { a: true, audio: true, del: true, ins: true, map: true, noscript: true, video: true },
920
+ };
908
921
  const frameflag = 'documentfragmentcontainer';
909
922
  /**
910
923
  * Parses HTML and returns a root element
@@ -1049,6 +1062,25 @@ function base_parse(data, options = {}) {
1049
1062
  continue;
1050
1063
  }
1051
1064
  }
1065
+ const openTag = currentParent.rawTagName ?
1066
+ currentParent.rawTagName.toLowerCase() :
1067
+ '';
1068
+ if (kElementsClosedByClosingExcept[openTag]) {
1069
+ const closingTag = tagName.toLowerCase();
1070
+ if (stack.length > 1) {
1071
+ const possibleContainer = stack[stack.length - 2];
1072
+ if (possibleContainer &&
1073
+ possibleContainer.rawTagName &&
1074
+ possibleContainer.rawTagName.toLowerCase() === closingTag &&
1075
+ !kElementsClosedByClosingExcept[openTag][closingTag]) {
1076
+ // Update range end for closed tag
1077
+ currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1078
+ stack.pop();
1079
+ currentParent = (0, back_1.default)(stack);
1080
+ continue;
1081
+ }
1082
+ }
1083
+ }
1052
1084
  // Use aggressive strategy to handle unmatching markups.
1053
1085
  break;
1054
1086
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "7.0.1",
3
+ "version": "7.0.2",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -87,7 +87,8 @@
87
87
  "standard-version": "^9.5.0",
88
88
  "travis-cov": "latest",
89
89
  "ts-node": "^10.9.1",
90
- "typescript": "latest"
90
+ "typescript": "latest",
91
+ "yarn": "^1.22.22"
91
92
  },
92
93
  "config": {
93
94
  "blanket": {