node-html-parser 5.3.2 → 5.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -19,18 +19,20 @@ npm install --save node-html-parser
19
19
 
20
20
  ## Performance
21
21
 
22
+ -- 2022-08-10
23
+
22
24
  ```shell
23
- cheerio :12.0726 ms/file ± 7.31605
24
- parse5 :8.18615 ms/file ± 6.15337
25
- node-html-parser (last release):2.16533 ms/file ± 1.56924
26
- htmlparser :17.0658 ms/file ± 120.901
27
- htmlparser2 :2.62695 ms/file ± 4.17579
28
- node-html-parser:2.14907 ms/file ± 1.66632
29
- html-parser :24.6505 ms/file ± 18.9996
30
- htmljs-parser :5.81797 ms/file ± 6.55537
31
- html-dom-parser :2.52265 ms/file ± 3.54858
32
- html5parser :2.01144 ms/file ± 2.53570
33
- high5 :3.91342 ms/file ± 2.65563
25
+ html-parser :24.1595 ms/file ± 18.7667
26
+ htmljs-parser :4.72064 ms/file ± 5.67689
27
+ html-dom-parser :2.18055 ms/file ± 2.96136
28
+ html5parser :1.69639 ms/file ± 2.17111
29
+ cheerio :12.2122 ms/file ± 8.10916
30
+ parse5 :6.50626 ms/file ± 4.02352
31
+ htmlparser2 :2.38179 ms/file ± 3.42389
32
+ htmlparser :17.4820 ms/file ± 128.041
33
+ high5 :3.95188 ms/file ± 2.52313
34
+ node-html-parser:2.04288 ms/file ± 1.25203
35
+ node-html-parser (last release):2.00527 ms/file ± 1.21317
34
36
  ```
35
37
 
36
38
  Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
@@ -82,6 +84,10 @@ Parse the data provided, and return the root of the generated DOM.
82
84
  {
83
85
  lowerCaseTagName: false, // convert tag name to lower case (hurts performance heavily)
84
86
  comment: false, // retrieve comments (hurts performance slightly)
87
+ voidTag:{
88
+ tags: ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'], // optional and case insensitive, default value is ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']
89
+ addClosingSlash: true // optional, default false. void tag serialisation, add a final slash <br/>
90
+ },
85
91
  blockTextElements: {
86
92
  script: true, // keep text content when parsing
87
93
  noscript: true, // keep text content when parsing
package/dist/main.js CHANGED
@@ -213,6 +213,37 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
213
213
  findAll: findAll
214
214
  };
215
215
  });
216
+ define("void-tag", ["require", "exports"], function (require, exports) {
217
+ "use strict";
218
+ Object.defineProperty(exports, "__esModule", { value: true });
219
+ var VoidTag = /** @class */ (function () {
220
+ function VoidTag(addClosingSlash, tags) {
221
+ if (addClosingSlash === void 0) { addClosingSlash = false; }
222
+ this.addClosingSlash = addClosingSlash;
223
+ if (Array.isArray(tags)) {
224
+ this.voidTags = tags.reduce(function (set, tag) {
225
+ return set.add(tag.toLowerCase());
226
+ }, new Set());
227
+ }
228
+ else {
229
+ this.voidTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'].reduce(function (set, tag) {
230
+ return set.add(tag);
231
+ }, new Set());
232
+ }
233
+ }
234
+ VoidTag.prototype.formatNode = function (tag, attrs, innerHTML) {
235
+ var addClosingSlash = this.addClosingSlash;
236
+ var closingSpace = (addClosingSlash && attrs && !attrs.endsWith(' ')) ? ' ' : '';
237
+ var closingSlash = addClosingSlash ? "".concat(closingSpace, "/") : '';
238
+ return this.isVoidElement(tag.toLowerCase()) ? "<".concat(tag).concat(attrs).concat(closingSlash, ">") : "<".concat(tag).concat(attrs, ">").concat(innerHTML, "</").concat(tag, ">");
239
+ };
240
+ VoidTag.prototype.isVoidElement = function (tag) {
241
+ return this.voidTags.has(tag);
242
+ };
243
+ return VoidTag;
244
+ }());
245
+ exports.default = VoidTag;
246
+ });
216
247
  define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], function (require, exports, he_2, node_1, type_2) {
217
248
  "use strict";
218
249
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -338,18 +369,18 @@ define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], f
338
369
  return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
339
370
  }
340
371
  });
341
- define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher", "nodes/comment", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, css_select_1, he_3, back_1, matcher_1, comment_1, node_2, text_1, type_3) {
372
+ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher", "void-tag", "nodes/comment", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, css_select_1, he_3, back_1, matcher_1, void_tag_1, comment_1, node_2, text_1, type_3) {
342
373
  "use strict";
343
374
  Object.defineProperty(exports, "__esModule", { value: true });
344
375
  exports.parse = exports.base_parse = void 0;
345
376
  he_3 = __importDefault(he_3);
346
377
  back_1 = __importDefault(back_1);
347
378
  matcher_1 = __importDefault(matcher_1);
379
+ void_tag_1 = __importDefault(void_tag_1);
348
380
  comment_1 = __importDefault(comment_1);
349
381
  node_2 = __importDefault(node_2);
350
382
  text_1 = __importDefault(text_1);
351
383
  type_3 = __importDefault(type_3);
352
- var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
353
384
  function decode(val) {
354
385
  // clone string
355
386
  return JSON.parse(JSON.stringify(he_3.default.decode(val)));
@@ -456,10 +487,12 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
456
487
  *
457
488
  * @memberof HTMLElement
458
489
  */
459
- function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range) {
490
+ function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
460
491
  if (rawAttrs === void 0) { rawAttrs = ''; }
492
+ if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
461
493
  var _this = _super.call(this, parentNode, range) || this;
462
494
  _this.rawAttrs = rawAttrs;
495
+ _this.voidTag = voidTag;
463
496
  /**
464
497
  * Node Type declaration.
465
498
  */
@@ -543,7 +576,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
543
576
  });
544
577
  Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
545
578
  get: function () {
546
- return voidTags.has(this.localName);
579
+ return this.voidTag.isVoidElement(this.localName);
547
580
  },
548
581
  enumerable: false,
549
582
  configurable: true
@@ -636,7 +669,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
636
669
  var tag = this.rawTagName;
637
670
  if (tag) {
638
671
  var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
639
- return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
672
+ return this.voidTag.formatNode(tag, attrs, this.innerHTML);
640
673
  }
641
674
  return this.innerHTML;
642
675
  };
@@ -1009,7 +1042,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1009
1042
  }
1010
1043
  var attrs = {};
1011
1044
  if (this.rawAttrs) {
1012
- var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1045
+ var re = /([a-zA-Z()[\]#][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1013
1046
  var match = void 0;
1014
1047
  while ((match = re.exec(this.rawAttrs))) {
1015
1048
  var key = match[1];
@@ -1333,7 +1366,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1333
1366
  * @return {HTMLElement} root element
1334
1367
  */
1335
1368
  function base_parse(data, options) {
1369
+ var _a, _b;
1336
1370
  if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1371
+ var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
1337
1372
  var elements = options.blockTextElements || {
1338
1373
  script: true,
1339
1374
  noscript: true,
@@ -1350,7 +1385,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1350
1385
  return kBlockTextElements.some(function (it) { return it.test(tag); });
1351
1386
  }
1352
1387
  var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1353
- var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1388
+ var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
1354
1389
  var currentParent = root;
1355
1390
  var stack = [root];
1356
1391
  var lastTextPos = -1;
@@ -1421,7 +1456,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1421
1456
  var tagStartPos_1 = tagEndPos_1 - matchLength;
1422
1457
  currentParent = currentParent.appendChild(
1423
1458
  // Initialize range (end position updated later for closed tags)
1424
- new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1459
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
1425
1460
  stack.push(currentParent);
1426
1461
  if (is_block_text_element(tagName)) {
1427
1462
  // Find closing tag
@@ -1,3 +1,4 @@
1
+ import VoidTag from '../void-tag';
1
2
  import Node from './node';
2
3
  import NodeType from './type';
3
4
  export interface KeyAttributes {
@@ -37,6 +38,7 @@ declare class DOMTokenList {
37
38
  */
38
39
  export default class HTMLElement extends Node {
39
40
  rawAttrs: string;
41
+ private voidTag;
40
42
  private _attrs;
41
43
  private _rawAttrs;
42
44
  rawTagName: string;
@@ -59,7 +61,7 @@ export default class HTMLElement extends Node {
59
61
  *
60
62
  * @memberof HTMLElement
61
63
  */
62
- constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range?: [number, number]);
64
+ constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range: [number, number], voidTag?: VoidTag);
63
65
  /**
64
66
  * Remove Child element from childNodes array
65
67
  * @param {HTMLElement} node node to remove
@@ -95,7 +97,7 @@ export default class HTMLElement extends Node {
95
97
  toString(): string;
96
98
  get innerHTML(): string;
97
99
  set innerHTML(content: string);
98
- set_content(content: string | Node | Node[], options?: Options): this;
100
+ set_content(content: string | Node | Node[], options?: Partial<Options>): this;
99
101
  replaceWith(...nodes: (string | Node)[]): void;
100
102
  get outerHTML(): string;
101
103
  /**
@@ -205,6 +207,16 @@ export interface Options {
205
207
  blockTextElements: {
206
208
  [tag: string]: boolean;
207
209
  };
210
+ voidTag?: {
211
+ /**
212
+ * options, default value is ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']
213
+ */
214
+ tags?: string[];
215
+ /**
216
+ * void tag serialisation, add a final slash <br/>
217
+ */
218
+ closingSlash?: boolean;
219
+ };
208
220
  }
209
221
  /**
210
222
  * Parses HTML and returns a root element
@@ -43,11 +43,11 @@ var css_select_1 = require("css-select");
43
43
  var he_1 = __importDefault(require("he"));
44
44
  var back_1 = __importDefault(require("../back"));
45
45
  var matcher_1 = __importDefault(require("../matcher"));
46
+ var void_tag_1 = __importDefault(require("../void-tag"));
46
47
  var comment_1 = __importDefault(require("./comment"));
47
48
  var node_1 = __importDefault(require("./node"));
48
49
  var text_1 = __importDefault(require("./text"));
49
50
  var type_1 = __importDefault(require("./type"));
50
- var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
51
51
  function decode(val) {
52
52
  // clone string
53
53
  return JSON.parse(JSON.stringify(he_1.default.decode(val)));
@@ -154,10 +154,12 @@ var HTMLElement = /** @class */ (function (_super) {
154
154
  *
155
155
  * @memberof HTMLElement
156
156
  */
157
- function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range) {
157
+ function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
158
158
  if (rawAttrs === void 0) { rawAttrs = ''; }
159
+ if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
159
160
  var _this = _super.call(this, parentNode, range) || this;
160
161
  _this.rawAttrs = rawAttrs;
162
+ _this.voidTag = voidTag;
161
163
  /**
162
164
  * Node Type declaration.
163
165
  */
@@ -241,7 +243,7 @@ var HTMLElement = /** @class */ (function (_super) {
241
243
  });
242
244
  Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
243
245
  get: function () {
244
- return voidTags.has(this.localName);
246
+ return this.voidTag.isVoidElement(this.localName);
245
247
  },
246
248
  enumerable: false,
247
249
  configurable: true
@@ -334,7 +336,7 @@ var HTMLElement = /** @class */ (function (_super) {
334
336
  var tag = this.rawTagName;
335
337
  if (tag) {
336
338
  var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
337
- return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
339
+ return this.voidTag.formatNode(tag, attrs, this.innerHTML);
338
340
  }
339
341
  return this.innerHTML;
340
342
  };
@@ -707,7 +709,7 @@ var HTMLElement = /** @class */ (function (_super) {
707
709
  }
708
710
  var attrs = {};
709
711
  if (this.rawAttrs) {
710
- var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
712
+ var re = /([a-zA-Z()[\]#][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
711
713
  var match = void 0;
712
714
  while ((match = re.exec(this.rawAttrs))) {
713
715
  var key = match[1];
@@ -1031,7 +1033,9 @@ var frameflag = 'documentfragmentcontainer';
1031
1033
  * @return {HTMLElement} root element
1032
1034
  */
1033
1035
  function base_parse(data, options) {
1036
+ var _a, _b;
1034
1037
  if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1038
+ var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
1035
1039
  var elements = options.blockTextElements || {
1036
1040
  script: true,
1037
1041
  noscript: true,
@@ -1048,7 +1052,7 @@ function base_parse(data, options) {
1048
1052
  return kBlockTextElements.some(function (it) { return it.test(tag); });
1049
1053
  }
1050
1054
  var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1051
- var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1055
+ var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
1052
1056
  var currentParent = root;
1053
1057
  var stack = [root];
1054
1058
  var lastTextPos = -1;
@@ -1119,7 +1123,7 @@ function base_parse(data, options) {
1119
1123
  var tagStartPos_1 = tagEndPos_1 - matchLength;
1120
1124
  currentParent = currentParent.appendChild(
1121
1125
  // Initialize range (end position updated later for closed tags)
1122
- new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1126
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
1123
1127
  stack.push(currentParent);
1124
1128
  if (is_block_text_element(tagName)) {
1125
1129
  // Find closing tag
@@ -0,0 +1,7 @@
1
+ export default class VoidTag {
2
+ addClosingSlash: boolean;
3
+ private voidTags;
4
+ constructor(addClosingSlash?: boolean, tags?: string[]);
5
+ formatNode(tag: string, attrs: string, innerHTML: string): string;
6
+ isVoidElement(tag: string): boolean;
7
+ }
@@ -0,0 +1,29 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ var VoidTag = /** @class */ (function () {
4
+ function VoidTag(addClosingSlash, tags) {
5
+ if (addClosingSlash === void 0) { addClosingSlash = false; }
6
+ this.addClosingSlash = addClosingSlash;
7
+ if (Array.isArray(tags)) {
8
+ this.voidTags = tags.reduce(function (set, tag) {
9
+ return set.add(tag.toLowerCase());
10
+ }, new Set());
11
+ }
12
+ else {
13
+ this.voidTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'].reduce(function (set, tag) {
14
+ return set.add(tag);
15
+ }, new Set());
16
+ }
17
+ }
18
+ VoidTag.prototype.formatNode = function (tag, attrs, innerHTML) {
19
+ var addClosingSlash = this.addClosingSlash;
20
+ var closingSpace = (addClosingSlash && attrs && !attrs.endsWith(' ')) ? ' ' : '';
21
+ var closingSlash = addClosingSlash ? "".concat(closingSpace, "/") : '';
22
+ return this.isVoidElement(tag.toLowerCase()) ? "<".concat(tag).concat(attrs).concat(closingSlash, ">") : "<".concat(tag).concat(attrs, ">").concat(innerHTML, "</").concat(tag, ">");
23
+ };
24
+ VoidTag.prototype.isVoidElement = function (tag) {
25
+ return this.voidTags.has(tag);
26
+ };
27
+ return VoidTag;
28
+ }());
29
+ exports.default = VoidTag;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "5.3.2",
3
+ "version": "5.4.1",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -31,8 +31,8 @@
31
31
  "prepare": "cd test && yarn install"
32
32
  },
33
33
  "keywords": [
34
- "parser",
35
34
  "html",
35
+ "parser",
36
36
  "nodejs",
37
37
  "typescript"
38
38
  ],