node-html-parser 6.1.15-0 → 7.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,29 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [7.0.1](https://github.com/taoqf/node-fast-html-parser/compare/v7.0.0...v7.0.1) (2024-12-26)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * upgrade node version ([efd77ff](https://github.com/taoqf/node-fast-html-parser/commit/efd77ff93593922512b12216984ba778b2f46593))
11
+
12
+ ## [7.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.14...v7.0.0) (2024-12-26)
13
+
14
+
15
+ ### ⚠ BREAKING CHANGES
16
+
17
+ * fix #277, for that change estarget to es6
18
+
19
+ ### Features
20
+
21
+ * fix [#277](https://github.com/taoqf/node-fast-html-parser/issues/277), for that change estarget to es6 ([432a3e7](https://github.com/taoqf/node-fast-html-parser/commit/432a3e71ba219e76188bcc2e89e525e40911d164))
22
+
23
+
24
+ ### Bug Fixes
25
+
26
+ * add tests for [#227](https://github.com/taoqf/node-fast-html-parser/issues/227) ([5856ee2](https://github.com/taoqf/node-fast-html-parser/commit/5856ee2ef2a0dfef43d75f6d4d13c37c213f25cf))
27
+
5
28
  ### [6.1.14](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.13...v6.1.14) (2024-05-14)
6
29
 
7
30
 
package/README.md CHANGED
@@ -6,7 +6,7 @@ DOM tree, with element query support.
6
6
  Per the design, it intends to parse massive HTML files in lowest price, thus the
7
7
  performance is the top priority. For this reason, some malformatted HTML may not
8
8
  be able to parse correctly, but most usual errors are covered (eg. HTML4 style
9
- no closing `<li>`, `<td>` etc).
9
+ no closing `<td>` etc).
10
10
 
11
11
  ## Install
12
12
 
@@ -186,7 +186,6 @@ Node --|> TextNode
186
186
  Node ..> ClassList
187
187
  ```
188
188
 
189
-
190
189
  ## HTMLElement Methods
191
190
 
192
191
  ### trimRight()
@@ -316,7 +315,6 @@ Clone a node.
316
315
 
317
316
  Get element by it's ID.
318
317
 
319
-
320
318
  ## HTMLElement Properties
321
319
 
322
320
  ### text
package/dist/index.js CHANGED
@@ -4,21 +4,20 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
6
  exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
7
- var comment_1 = __importDefault(require("./nodes/comment"));
7
+ const comment_1 = __importDefault(require("./nodes/comment"));
8
8
  exports.CommentNode = comment_1.default;
9
- var html_1 = __importDefault(require("./nodes/html"));
9
+ const html_1 = __importDefault(require("./nodes/html"));
10
10
  exports.HTMLElement = html_1.default;
11
- var node_1 = __importDefault(require("./nodes/node"));
11
+ const node_1 = __importDefault(require("./nodes/node"));
12
12
  exports.Node = node_1.default;
13
- var text_1 = __importDefault(require("./nodes/text"));
13
+ const text_1 = __importDefault(require("./nodes/text"));
14
14
  exports.TextNode = text_1.default;
15
- var type_1 = __importDefault(require("./nodes/type"));
15
+ const type_1 = __importDefault(require("./nodes/type"));
16
16
  exports.NodeType = type_1.default;
17
- var parse_1 = __importDefault(require("./parse"));
18
- var valid_1 = __importDefault(require("./valid"));
17
+ const parse_1 = __importDefault(require("./parse"));
18
+ const valid_1 = __importDefault(require("./valid"));
19
19
  exports.valid = valid_1.default;
20
- function parse(data, options) {
21
- if (options === void 0) { options = {}; }
20
+ function parse(data, options = {}) {
22
21
  return (0, parse_1.default)(data, options);
23
22
  }
24
23
  exports.default = parse;
package/dist/main.js CHANGED
@@ -535,7 +535,11 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
535
535
  if (attr == null) {
536
536
  return 'null';
537
537
  }
538
- return JSON.stringify(attr.replace(/"/g, '&quot;')).replace(/\\t/g, '\t').replace(/\\n/g, '\n').replace(/\\r/g, '\r').replace(/\\/g, '');
538
+ return JSON.stringify(attr.replace(/"/g, '&quot;'))
539
+ .replace(/\\t/g, '\t')
540
+ .replace(/\\n/g, '\n')
541
+ .replace(/\\r/g, '\r')
542
+ .replace(/\\/g, '');
539
543
  };
540
544
  /**
541
545
  * Remove Child element from childNodes array
@@ -738,8 +742,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
738
742
  return child === _this;
739
743
  });
740
744
  resetParent([this], null);
741
- resetParent(content, parent);
742
- parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), content, true), parent.childNodes.slice(idx + 1), true);
745
+ parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), resetParent(content, parent), true), parent.childNodes.slice(idx + 1), true);
743
746
  return this;
744
747
  };
745
748
  Object.defineProperty(HTMLElement.prototype, "outerHTML", {
@@ -826,10 +829,12 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
826
829
  });
827
830
  this.childNodes.length = o;
828
831
  // remove whitespace between attributes
829
- var attrs = Object.keys(this.rawAttributes).map(function (key) {
832
+ var attrs = Object.keys(this.rawAttributes)
833
+ .map(function (key) {
830
834
  var val = _this.rawAttributes[key];
831
835
  return "".concat(key, "=").concat(JSON.stringify(val));
832
- }).join(' ');
836
+ })
837
+ .join(' ');
833
838
  this.rawAttrs = attrs;
834
839
  delete this._rawAttrs;
835
840
  return this;
@@ -921,7 +926,6 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
921
926
  if (child.id === id) {
922
927
  return child;
923
928
  }
924
- ;
925
929
  // if children are existing push the current status to the stack and keep searching for elements in the level below
926
930
  if (child.childNodes.length > 0) {
927
931
  stack.push(index);
@@ -1036,7 +1040,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1036
1040
  }
1037
1041
  var attrs = {};
1038
1042
  if (this.rawAttrs) {
1039
- var re = /([a-zA-Z()[\]#@$.?:][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1043
+ var re = /([a-zA-Z()[\]#@$.?:][a-zA-Z0-9-._:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1040
1044
  var match = void 0;
1041
1045
  while ((match = re.exec(this.rawAttrs))) {
1042
1046
  var key = match[1];
@@ -1372,8 +1376,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1372
1376
  return HTMLElement;
1373
1377
  }(node_2.default));
1374
1378
  exports.default = HTMLElement;
1379
+ // #xB7 | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x203F-#x2040] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1375
1380
  // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
1376
- var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
1381
+ var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z@\xB7\xC0-\xD6\xD8-\xF6\u00F8-\u03A1\u03A3-\u03D9\u03DB-\u03EF\u03F7-\u03FF\u0400-\u04FF\u0500-\u052F\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1E9B\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA78E\uA790-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB5F\uAB64-\uAB65\uFB00-\uFB06\uFB13-\uFB17\uFF21-\uFF3A\uFF41-\uFF5A\x37F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/gu;
1382
+ // const kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
1377
1383
  var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
1378
1384
  var kElementsClosedByOpening = {
1379
1385
  li: { li: true, LI: true },
@@ -1574,11 +1580,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1574
1580
  * Parse a chuck of HTML source.
1575
1581
  */
1576
1582
  function parse(data, options) {
1577
- var _a, _b;
1578
1583
  if (options === void 0) { options = {}; }
1579
1584
  var stack = base_parse(data, options);
1580
1585
  var root = stack[0];
1581
- while (stack.length > 1) {
1586
+ var _loop_1 = function () {
1582
1587
  // Handle each error elements.
1583
1588
  var last = stack.pop();
1584
1589
  var oneBefore = (0, back_1.default)(stack);
@@ -1588,7 +1593,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1588
1593
  // this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
1589
1594
  if (options.parseNoneClosedTags !== true) {
1590
1595
  oneBefore.removeChild(last);
1591
- (_a = oneBefore.parentNode.childNodes).push.apply(_a, last.childNodes);
1596
+ last.childNodes.forEach(function (child) {
1597
+ oneBefore.parentNode.appendChild(child);
1598
+ });
1592
1599
  stack.pop();
1593
1600
  }
1594
1601
  }
@@ -1598,20 +1605,24 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1598
1605
  // eslint-disable-next-line no-lonely-if
1599
1606
  if (options.parseNoneClosedTags !== true) {
1600
1607
  oneBefore.removeChild(last);
1601
- (_b = oneBefore.childNodes).push.apply(_b, last.childNodes);
1608
+ last.childNodes.forEach(function (child) {
1609
+ oneBefore.appendChild(child);
1610
+ });
1602
1611
  }
1603
1612
  }
1604
1613
  }
1605
1614
  else {
1606
1615
  // If it's final element just skip.
1607
1616
  }
1617
+ };
1618
+ while (stack.length > 1) {
1619
+ _loop_1();
1608
1620
  }
1609
1621
  // response.childNodes.forEach((node) => {
1610
1622
  // if (node instanceof HTMLElement) {
1611
1623
  // node.parentNode = null;
1612
1624
  // }
1613
1625
  // });
1614
- resetParent(root.childNodes, root, true);
1615
1626
  return root;
1616
1627
  }
1617
1628
  exports.parse = parse;
@@ -1628,13 +1639,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1628
1639
  return val;
1629
1640
  });
1630
1641
  }
1631
- function resetParent(nodes, parent, recursive) {
1632
- if (recursive === void 0) { recursive = false; }
1633
- nodes.forEach(function (node) {
1642
+ function resetParent(nodes, parent) {
1643
+ return nodes.map(function (node) {
1634
1644
  node.parentNode = parent;
1635
- if (recursive && node instanceof HTMLElement) {
1636
- resetParent(node.childNodes, node, true);
1637
- }
1645
+ return node;
1638
1646
  });
1639
1647
  }
1640
1648
  });
package/dist/matcher.js CHANGED
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- var type_1 = __importDefault(require("./nodes/type"));
6
+ const type_1 = __importDefault(require("./nodes/type"));
7
7
  function isTag(node) {
8
8
  return node && node.nodeType === type_1.default.ELEMENT_NODE;
9
9
  }
@@ -23,10 +23,10 @@ function getText(node) {
23
23
  return node.text;
24
24
  }
25
25
  function removeSubsets(nodes) {
26
- var idx = nodes.length;
27
- var node;
28
- var ancestor;
29
- var replace;
26
+ let idx = nodes.length;
27
+ let node;
28
+ let ancestor;
29
+ let replace;
30
30
  // Check if each node (or one of its ancestors) is already contained in the
31
31
  // array.
32
32
  while (--idx > -1) {
@@ -50,26 +50,26 @@ function removeSubsets(nodes) {
50
50
  return nodes;
51
51
  }
52
52
  function existsOne(test, elems) {
53
- return elems.some(function (elem) {
53
+ return elems.some((elem) => {
54
54
  return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
55
55
  });
56
56
  }
57
57
  function getSiblings(node) {
58
- var parent = getParent(node);
58
+ const parent = getParent(node);
59
59
  return parent ? getChildren(parent) : [];
60
60
  }
61
61
  function hasAttrib(elem, name) {
62
62
  return getAttributeValue(elem, name) !== undefined;
63
63
  }
64
64
  function findOne(test, elems) {
65
- var elem = null;
66
- for (var i = 0, l = elems === null || elems === void 0 ? void 0 : elems.length; i < l && !elem; i++) {
67
- var el = elems[i];
65
+ let elem = null;
66
+ for (let i = 0, l = elems === null || elems === void 0 ? void 0 : elems.length; i < l && !elem; i++) {
67
+ const el = elems[i];
68
68
  if (test(el)) {
69
69
  elem = el;
70
70
  }
71
71
  else {
72
- var childs = getChildren(el);
72
+ const childs = getChildren(el);
73
73
  if (childs && childs.length > 0) {
74
74
  elem = findOne(test, childs);
75
75
  }
@@ -78,29 +78,29 @@ function findOne(test, elems) {
78
78
  return elem;
79
79
  }
80
80
  function findAll(test, nodes) {
81
- var result = [];
82
- for (var i = 0, j = nodes.length; i < j; i++) {
81
+ let result = [];
82
+ for (let i = 0, j = nodes.length; i < j; i++) {
83
83
  if (!isTag(nodes[i]))
84
84
  continue;
85
85
  if (test(nodes[i]))
86
86
  result.push(nodes[i]);
87
- var childs = getChildren(nodes[i]);
87
+ const childs = getChildren(nodes[i]);
88
88
  if (childs)
89
89
  result = result.concat(findAll(test, childs));
90
90
  }
91
91
  return result;
92
92
  }
93
93
  exports.default = {
94
- isTag: isTag,
95
- getAttributeValue: getAttributeValue,
96
- getName: getName,
97
- getChildren: getChildren,
98
- getParent: getParent,
99
- getText: getText,
100
- removeSubsets: removeSubsets,
101
- existsOne: existsOne,
102
- getSiblings: getSiblings,
103
- hasAttrib: hasAttrib,
104
- findOne: findOne,
105
- findAll: findAll
94
+ isTag,
95
+ getAttributeValue,
96
+ getName,
97
+ getChildren,
98
+ getParent,
99
+ getText,
100
+ removeSubsets,
101
+ existsOne,
102
+ getSiblings,
103
+ hasAttrib,
104
+ findOne,
105
+ findAll
106
106
  };
@@ -1,57 +1,33 @@
1
1
  "use strict";
2
- var __extends = (this && this.__extends) || (function () {
3
- var extendStatics = function (d, b) {
4
- extendStatics = Object.setPrototypeOf ||
5
- ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
6
- function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
7
- return extendStatics(d, b);
8
- };
9
- return function (d, b) {
10
- if (typeof b !== "function" && b !== null)
11
- throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
12
- extendStatics(d, b);
13
- function __() { this.constructor = d; }
14
- d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
- };
16
- })();
17
2
  var __importDefault = (this && this.__importDefault) || function (mod) {
18
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
19
4
  };
20
5
  Object.defineProperty(exports, "__esModule", { value: true });
21
- var node_1 = __importDefault(require("./node"));
22
- var type_1 = __importDefault(require("./type"));
23
- var CommentNode = /** @class */ (function (_super) {
24
- __extends(CommentNode, _super);
25
- function CommentNode(rawText, parentNode, range, rawTagName) {
26
- if (parentNode === void 0) { parentNode = null; }
27
- if (rawTagName === void 0) { rawTagName = '!--'; }
28
- var _this = _super.call(this, parentNode, range) || this;
29
- _this.rawText = rawText;
30
- _this.rawTagName = rawTagName;
6
+ const node_1 = __importDefault(require("./node"));
7
+ const type_1 = __importDefault(require("./type"));
8
+ class CommentNode extends node_1.default {
9
+ clone() {
10
+ return new CommentNode(this.rawText, null, undefined, this.rawTagName);
11
+ }
12
+ constructor(rawText, parentNode = null, range, rawTagName = '!--') {
13
+ super(parentNode, range);
14
+ this.rawText = rawText;
15
+ this.rawTagName = rawTagName;
31
16
  /**
32
17
  * Node Type declaration.
33
18
  * @type {Number}
34
19
  */
35
- _this.nodeType = type_1.default.COMMENT_NODE;
36
- return _this;
20
+ this.nodeType = type_1.default.COMMENT_NODE;
37
21
  }
38
- CommentNode.prototype.clone = function () {
39
- return new CommentNode(this.rawText, null, undefined, this.rawTagName);
40
- };
41
- Object.defineProperty(CommentNode.prototype, "text", {
42
- /**
43
- * Get unescaped text value of current node and its children.
44
- * @return {string} text content
45
- */
46
- get: function () {
47
- return this.rawText;
48
- },
49
- enumerable: false,
50
- configurable: true
51
- });
52
- CommentNode.prototype.toString = function () {
53
- return "<!--".concat(this.rawText, "-->");
54
- };
55
- return CommentNode;
56
- }(node_1.default));
22
+ /**
23
+ * Get unescaped text value of current node and its children.
24
+ * @return {string} text content
25
+ */
26
+ get text() {
27
+ return this.rawText;
28
+ }
29
+ toString() {
30
+ return `<!--${this.rawText}-->`;
31
+ }
32
+ }
57
33
  exports.default = CommentNode;