node-html-parser 6.1.0 → 6.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,20 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ### [6.1.2](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.1...v6.1.2) (2022-11-14)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * [#224](https://github.com/taoqf/node-fast-html-parser/issues/224) ([fc367fa](https://github.com/taoqf/node-fast-html-parser/commit/fc367fa294d72794a0dea49edbd986d527a6314b))
11
+
12
+ ### [6.1.1](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.0...v6.1.1) (2022-09-24)
13
+
14
+
15
+ ### Bug Fixes
16
+
17
+ * parse comments ([82b68ff](https://github.com/taoqf/node-fast-html-parser/commit/82b68ff9eb944e0c55ca2e0ea13fb714e2004803))
18
+
5
19
  ## [6.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v6.0.0...v6.1.0) (2022-09-19)
6
20
 
7
21
 
package/dist/index.js CHANGED
@@ -18,10 +18,7 @@ var parse_1 = __importDefault(require("./parse"));
18
18
  var valid_1 = __importDefault(require("./valid"));
19
19
  exports.valid = valid_1.default;
20
20
  function parse(data, options) {
21
- if (options === void 0) { options = {
22
- lowerCaseTagName: false,
23
- comment: false
24
- }; }
21
+ if (options === void 0) { options = {}; }
25
22
  return (0, parse_1.default)(data, options);
26
23
  }
27
24
  exports.default = parse;
package/dist/main.js CHANGED
@@ -171,7 +171,7 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
171
171
  }
172
172
  function findOne(test, elems) {
173
173
  var elem = null;
174
- for (var i = 0, l = elems.length; i < l && !elem; i++) {
174
+ for (var i = 0, l = elems === null || elems === void 0 ? void 0 : elems.length; i < l && !elem; i++) {
175
175
  var el = elems[i];
176
176
  if (test(el)) {
177
177
  elem = el;
@@ -487,9 +487,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
487
487
  *
488
488
  * @memberof HTMLElement
489
489
  */
490
- function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
490
+ function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag, _parseOptions) {
491
491
  if (rawAttrs === void 0) { rawAttrs = ''; }
492
492
  if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
493
+ if (_parseOptions === void 0) { _parseOptions = {}; }
493
494
  var _this = _super.call(this, parentNode, range) || this;
494
495
  _this.rawAttrs = rawAttrs;
495
496
  _this.voidTag = voidTag;
@@ -501,6 +502,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
501
502
  _this.rawAttrs = rawAttrs || '';
502
503
  _this.id = keyAttrs.id || '';
503
504
  _this.childNodes = [];
505
+ _this._parseOptions = _parseOptions;
504
506
  _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
505
507
  );
506
508
  if (keyAttrs.id) {
@@ -682,8 +684,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
682
684
  .join('');
683
685
  },
684
686
  set: function (content) {
685
- //const r = parse(content, global.options); // TODO global.options ?
686
- var r = parse(content);
687
+ var r = parse(content, this._parseOptions);
687
688
  var nodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
688
689
  resetParent(nodes, this);
689
690
  resetParent(this.childNodes, null);
@@ -698,8 +699,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
698
699
  content = [content];
699
700
  }
700
701
  else if (typeof content == 'string') {
702
+ options = __assign(__assign({}, this._parseOptions), options);
701
703
  var r = parse(content, options);
702
- content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
704
+ content = r.childNodes.length ? r.childNodes : [new text_1.default(r.innerHTML, this)];
703
705
  }
704
706
  resetParent(this.childNodes, null);
705
707
  resetParent(content, this);
@@ -719,8 +721,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
719
721
  return [node];
720
722
  }
721
723
  else if (typeof node == 'string') {
722
- // const r = parse(content, global.options); // TODO global.options ?
723
- var r = parse(node);
724
+ var r = parse(node, _this._parseOptions);
724
725
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
725
726
  }
726
727
  return [];
@@ -1161,7 +1162,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1161
1162
  if (arguments.length < 2) {
1162
1163
  throw new Error('2 arguments required');
1163
1164
  }
1164
- var p = parse(html);
1165
+ var p = parse(html, this._parseOptions);
1165
1166
  if (where === 'afterend') {
1166
1167
  var idx = this.parentNode.childNodes.findIndex(function (child) {
1167
1168
  return child === _this;
@@ -1282,7 +1283,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1282
1283
  * Clone this Node
1283
1284
  */
1284
1285
  HTMLElement.prototype.clone = function () {
1285
- return parse(this.toString()).firstChild;
1286
+ return parse(this.toString(), this._parseOptions).firstChild;
1286
1287
  };
1287
1288
  return HTMLElement;
1288
1289
  }(node_2.default));
@@ -1369,7 +1370,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1369
1370
  */
1370
1371
  function base_parse(data, options) {
1371
1372
  var _a, _b;
1372
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1373
+ if (options === void 0) { options = {}; }
1373
1374
  var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
1374
1375
  var elements = options.blockTextElements || {
1375
1376
  script: true,
@@ -1387,7 +1388,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1387
1388
  return kBlockTextElements.some(function (it) { return it.test(tag); });
1388
1389
  }
1389
1390
  var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1390
- var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
1391
+ var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag, options);
1391
1392
  var currentParent = root;
1392
1393
  var stack = [root];
1393
1394
  var lastTextPos = -1;
@@ -1458,7 +1459,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1458
1459
  var tagStartPos_1 = tagEndPos_1 - matchLength;
1459
1460
  currentParent = currentParent.appendChild(
1460
1461
  // Initialize range (end position updated later for closed tags)
1461
- new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
1462
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag, options));
1462
1463
  stack.push(currentParent);
1463
1464
  if (is_block_text_element(tagName)) {
1464
1465
  // Find closing tag
@@ -1519,7 +1520,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1519
1520
  * Parse a chuck of HTML source.
1520
1521
  */
1521
1522
  function parse(data, options) {
1522
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1523
+ if (options === void 0) { options = {}; }
1523
1524
  var stack = base_parse(data, options);
1524
1525
  var root = stack[0];
1525
1526
  var _loop_1 = function () {
@@ -1624,7 +1625,7 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
1624
1625
  * Parse a chuck of HTML source.
1625
1626
  */
1626
1627
  function valid(data, options) {
1627
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1628
+ if (options === void 0) { options = {}; }
1628
1629
  var stack = (0, html_2.base_parse)(data, options);
1629
1630
  return Boolean(stack.length === 1);
1630
1631
  }
@@ -1648,10 +1649,7 @@ define("index", ["require", "exports", "nodes/comment", "nodes/html", "nodes/nod
1648
1649
  exports.NodeType = type_5.default;
1649
1650
  exports.valid = valid_1.default;
1650
1651
  function parse(data, options) {
1651
- if (options === void 0) { options = {
1652
- lowerCaseTagName: false,
1653
- comment: false
1654
- }; }
1652
+ if (options === void 0) { options = {}; }
1655
1653
  return (0, parse_1.default)(data, options);
1656
1654
  }
1657
1655
  exports.default = parse;
package/dist/matcher.js CHANGED
@@ -63,7 +63,7 @@ function hasAttrib(elem, name) {
63
63
  }
64
64
  function findOne(test, elems) {
65
65
  var elem = null;
66
- for (var i = 0, l = elems.length; i < l && !elem; i++) {
66
+ for (var i = 0, l = elems === null || elems === void 0 ? void 0 : elems.length; i < l && !elem; i++) {
67
67
  var el = elems[i];
68
68
  if (test(el)) {
69
69
  elem = el;
@@ -41,6 +41,7 @@ export default class HTMLElement extends Node {
41
41
  private voidTag;
42
42
  private _attrs;
43
43
  private _rawAttrs;
44
+ private _parseOptions;
44
45
  rawTagName: string;
45
46
  id: string;
46
47
  classList: DOMTokenList;
@@ -61,7 +62,7 @@ export default class HTMLElement extends Node {
61
62
  *
62
63
  * @memberof HTMLElement
63
64
  */
64
- constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range: [number, number], voidTag?: VoidTag);
65
+ constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range: [number, number], voidTag?: VoidTag, _parseOptions?: Partial<Options>);
65
66
  /**
66
67
  * Remove Child element from childNodes array
67
68
  * @param {HTMLElement} node node to remove
@@ -142,7 +143,7 @@ export default class HTMLElement extends Node {
142
143
  * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
143
144
  * @param selector a DOMString containing a selector list
144
145
  */
145
- closest(selector: string): Node;
146
+ closest(selector: string): HTMLElement;
146
147
  /**
147
148
  * Append a child node to childNodes
148
149
  * @param {Node} node node to append
@@ -201,8 +202,8 @@ export default class HTMLElement extends Node {
201
202
  clone(): Node;
202
203
  }
203
204
  export interface Options {
204
- lowerCaseTagName: boolean;
205
- comment: boolean;
205
+ lowerCaseTagName?: boolean;
206
+ comment?: boolean;
206
207
  /**
207
208
  * @see PR #215 for explanation
208
209
  */
@@ -154,9 +154,10 @@ var HTMLElement = /** @class */ (function (_super) {
154
154
  *
155
155
  * @memberof HTMLElement
156
156
  */
157
- function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
157
+ function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag, _parseOptions) {
158
158
  if (rawAttrs === void 0) { rawAttrs = ''; }
159
159
  if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
160
+ if (_parseOptions === void 0) { _parseOptions = {}; }
160
161
  var _this = _super.call(this, parentNode, range) || this;
161
162
  _this.rawAttrs = rawAttrs;
162
163
  _this.voidTag = voidTag;
@@ -168,6 +169,7 @@ var HTMLElement = /** @class */ (function (_super) {
168
169
  _this.rawAttrs = rawAttrs || '';
169
170
  _this.id = keyAttrs.id || '';
170
171
  _this.childNodes = [];
172
+ _this._parseOptions = _parseOptions;
171
173
  _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
172
174
  );
173
175
  if (keyAttrs.id) {
@@ -349,8 +351,7 @@ var HTMLElement = /** @class */ (function (_super) {
349
351
  .join('');
350
352
  },
351
353
  set: function (content) {
352
- //const r = parse(content, global.options); // TODO global.options ?
353
- var r = parse(content);
354
+ var r = parse(content, this._parseOptions);
354
355
  var nodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
355
356
  resetParent(nodes, this);
356
357
  resetParent(this.childNodes, null);
@@ -365,8 +366,9 @@ var HTMLElement = /** @class */ (function (_super) {
365
366
  content = [content];
366
367
  }
367
368
  else if (typeof content == 'string') {
369
+ options = __assign(__assign({}, this._parseOptions), options);
368
370
  var r = parse(content, options);
369
- content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
371
+ content = r.childNodes.length ? r.childNodes : [new text_1.default(r.innerHTML, this)];
370
372
  }
371
373
  resetParent(this.childNodes, null);
372
374
  resetParent(content, this);
@@ -386,8 +388,7 @@ var HTMLElement = /** @class */ (function (_super) {
386
388
  return [node];
387
389
  }
388
390
  else if (typeof node == 'string') {
389
- // const r = parse(content, global.options); // TODO global.options ?
390
- var r = parse(node);
391
+ var r = parse(node, _this._parseOptions);
391
392
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
392
393
  }
393
394
  return [];
@@ -828,7 +829,7 @@ var HTMLElement = /** @class */ (function (_super) {
828
829
  if (arguments.length < 2) {
829
830
  throw new Error('2 arguments required');
830
831
  }
831
- var p = parse(html);
832
+ var p = parse(html, this._parseOptions);
832
833
  if (where === 'afterend') {
833
834
  var idx = this.parentNode.childNodes.findIndex(function (child) {
834
835
  return child === _this;
@@ -949,7 +950,7 @@ var HTMLElement = /** @class */ (function (_super) {
949
950
  * Clone this Node
950
951
  */
951
952
  HTMLElement.prototype.clone = function () {
952
- return parse(this.toString()).firstChild;
953
+ return parse(this.toString(), this._parseOptions).firstChild;
953
954
  };
954
955
  return HTMLElement;
955
956
  }(node_1.default));
@@ -1036,7 +1037,7 @@ var frameflag = 'documentfragmentcontainer';
1036
1037
  */
1037
1038
  function base_parse(data, options) {
1038
1039
  var _a, _b;
1039
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1040
+ if (options === void 0) { options = {}; }
1040
1041
  var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
1041
1042
  var elements = options.blockTextElements || {
1042
1043
  script: true,
@@ -1054,7 +1055,7 @@ function base_parse(data, options) {
1054
1055
  return kBlockTextElements.some(function (it) { return it.test(tag); });
1055
1056
  }
1056
1057
  var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1057
- var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
1058
+ var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag, options);
1058
1059
  var currentParent = root;
1059
1060
  var stack = [root];
1060
1061
  var lastTextPos = -1;
@@ -1125,7 +1126,7 @@ function base_parse(data, options) {
1125
1126
  var tagStartPos_1 = tagEndPos_1 - matchLength;
1126
1127
  currentParent = currentParent.appendChild(
1127
1128
  // Initialize range (end position updated later for closed tags)
1128
- new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
1129
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag, options));
1129
1130
  stack.push(currentParent);
1130
1131
  if (is_block_text_element(tagName)) {
1131
1132
  // Find closing tag
@@ -1186,7 +1187,7 @@ exports.base_parse = base_parse;
1186
1187
  * Parse a chuck of HTML source.
1187
1188
  */
1188
1189
  function parse(data, options) {
1189
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1190
+ if (options === void 0) { options = {}; }
1190
1191
  var stack = base_parse(data, options);
1191
1192
  var root = stack[0];
1192
1193
  var _loop_1 = function () {
package/dist/valid.js CHANGED
@@ -6,7 +6,7 @@ var html_1 = require("./nodes/html");
6
6
  * Parse a chuck of HTML source.
7
7
  */
8
8
  function valid(data, options) {
9
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
9
+ if (options === void 0) { options = {}; }
10
10
  var stack = (0, html_1.base_parse)(data, options);
11
11
  return Boolean(stack.length === 1);
12
12
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "6.1.0",
3
+ "version": "6.1.2",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -8,6 +8,7 @@
8
8
  "compile": "tsc",
9
9
  "build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
10
10
  "compile:cjs": "tsc -m commonjs",
11
+ "watch": "npx tsc -m commonjs --watch --preserveWatchOutput",
11
12
  "compile:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
12
13
  "lint": "eslint ./src/*.ts ./src/**/*.ts",
13
14
  "---------------": "",
@@ -23,7 +24,8 @@
23
24
  "test:target": "mocha --recursive \"./test/tests\"",
24
25
  "test:ci": "cross-env TEST_TARGET=dist yarn run test:target",
25
26
  "posttest": "yarn run benchmark",
26
- "prepare": "cd test && yarn install"
27
+ "prepare": "cd test && yarn install",
28
+ "release": "standard-version && git push --follow-tags origin main"
27
29
  },
28
30
  "keywords": [
29
31
  "html",
@@ -47,7 +49,7 @@
47
49
  "registry": "https://registry.npmjs.org"
48
50
  },
49
51
  "dependencies": {
50
- "css-select": "^4.2.1",
52
+ "css-select": "^5.1.0",
51
53
  "he": "1.2.0"
52
54
  },
53
55
  "devDependencies": {
@@ -58,31 +60,31 @@
58
60
  "@typescript-eslint/eslint-plugin-tslint": "latest",
59
61
  "@typescript-eslint/parser": "latest",
60
62
  "blanket": "latest",
61
- "cheerio": "^1.0.0-rc.5",
63
+ "cheerio": "^1.0.0-rc.12",
62
64
  "cross-env": "^7.0.3",
63
- "eslint": "^7.32.0",
65
+ "eslint": "^8.23.1",
64
66
  "eslint-config-prettier": "latest",
65
67
  "eslint-plugin-import": "latest",
66
68
  "high5": "^1.0.0",
67
- "html-dom-parser": "^1.0.4",
69
+ "html-dom-parser": "^3.1.2",
68
70
  "html-parser": "^0.11.0",
69
71
  "html5parser": "^2.0.2",
70
- "htmljs-parser": "^2.11.1",
72
+ "htmljs-parser": "^5.1.4",
71
73
  "htmlparser": "^1.7.7",
72
74
  "htmlparser-benchmark": "^1.1.3",
73
- "htmlparser2": "^6.0.0",
75
+ "htmlparser2": "^8.0.1",
74
76
  "mocha": "latest",
75
77
  "mocha-each": "^2.0.1",
76
78
  "neutron-html5parser": "^0.2.0",
77
79
  "np": "latest",
78
- "parse5": "^6.0.1",
80
+ "parse5": "^7.1.1",
79
81
  "rimraf": "^3.0.2",
80
82
  "saxes": "^6.0.0",
81
83
  "should": "latest",
82
84
  "spec": "latest",
83
- "standard-version": "^9.3.1",
85
+ "standard-version": "^9.5.0",
84
86
  "travis-cov": "latest",
85
- "ts-node": "^10.2.1",
87
+ "ts-node": "^10.9.1",
86
88
  "typescript": "latest"
87
89
  },
88
90
  "config": {