node-html-parser 5.1.0 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,23 +4,9 @@ All notable changes to this project will be documented in this file. See [standa
4
4
 
5
5
  ## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
6
6
 
7
-
8
- ### ⚠ BREAKING CHANGES
9
-
10
- * Add esm named export support (closes #160 closes #139)
11
-
12
7
  ### Features
13
8
 
14
- * Add esm named export support (closes [#160](https://github.com/taoqf/node-fast-html-parser/issues/160) closes [#139](https://github.com/taoqf/node-fast-html-parser/issues/139)) ([0d4b922](https://github.com/taoqf/node-fast-html-parser/commit/0d4b922eefd6210fe802991e464b21b0c69d5f63))
15
- * Added HTMLElement#getElementsByTagName ([d462e44](https://github.com/taoqf/node-fast-html-parser/commit/d462e449e7ebb00a5a43fb574133681ad5a62475))
16
- * Expose `HTMLElement#rawAttrs` (make public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
17
- * Improved parsing performance + matching (closes [#164](https://github.com/taoqf/node-fast-html-parser/issues/164)) ([3c5b8e2](https://github.com/taoqf/node-fast-html-parser/commit/3c5b8e2a9104b01a8ca899a7970507463e42adaf))
18
-
19
-
20
- ### Bug Fixes
21
-
22
- * Add null to return type for HTMLElement#querySelector (closes [#157](https://github.com/taoqf/node-fast-html-parser/issues/157)) ([2b65583](https://github.com/taoqf/node-fast-html-parser/commit/2b655839bd3868c41fb19cae5786ca097565bc7f))
23
- * blockTextElements incorrectly matching partial tag (detail) (fixes [#156](https://github.com/taoqf/node-fast-html-parser/issues/156) fixes [#124](https://github.com/taoqf/node-fast-html-parser/issues/124)) ([6823349](https://github.com/taoqf/node-fast-html-parser/commit/6823349fdf1809c7484c70d948aa24930ef4983f))
9
+ * Exposed `HTMLElement#rawAttrs` (made public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
24
10
 
25
11
  ## [5.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.0.0) (2021-10-10)
26
12
 
package/README.md CHANGED
@@ -19,15 +19,18 @@ npm install --save node-html-parser
19
19
 
20
20
  ## Performance
21
21
 
22
- Faster than htmlparser2!
23
-
24
22
  ```shell
25
- htmlparser :26.7111 ms/file ± 170.066
26
- cheerio :24.2480 ms/file ± 17.1711
27
- parse5 :13.7239 ms/file ± 8.68561
28
- high5 :7.75466 ms/file ± 5.33549
29
- htmlparser2 :5.27376 ms/file ± 8.68456
30
- node-html-parser:2.85768 ms/file ± 2.87784
23
+ cheerio :12.0726 ms/file ± 7.31605
24
+ parse5 :8.18615 ms/file ± 6.15337
25
+ node-html-parser (last release):2.16533 ms/file ± 1.56924
26
+ htmlparser :17.0658 ms/file ± 120.901
27
+ htmlparser2 :2.62695 ms/file ± 4.17579
28
+ node-html-parser:2.14907 ms/file ± 1.66632
29
+ html-parser :24.6505 ms/file ± 18.9996
30
+ htmljs-parser :5.81797 ms/file ± 6.55537
31
+ html-dom-parser :2.52265 ms/file ± 3.54858
32
+ html5parser :2.01144 ms/file ± 2.53570
33
+ high5 :3.91342 ms/file ± 2.65563
31
34
  ```
32
35
 
33
36
  Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
@@ -70,15 +73,15 @@ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
70
73
 
71
74
  ### parse(data[, options])
72
75
 
73
- Parse given data, and return root of the generated DOM.
76
+ Parse the data provided, and return the root of the generated DOM.
74
77
 
75
78
  - **data**, data to parse
76
79
  - **options**, parse options
77
80
 
78
81
  ```js
79
82
  {
80
- lowerCaseTagName: false, // convert tag name to lower case (hurt performance heavily)
81
- comment: false, // retrieve comments (hurt performance slightly)
83
+ lowerCaseTagName: false, // convert tag name to lower case (hurts performance heavily)
84
+ comment: false, // retrieve comments (hurts performance slightly)
82
85
  blockTextElements: {
83
86
  script: true, // keep text content when parsing
84
87
  noscript: true, // keep text content when parsing
@@ -90,7 +93,7 @@ Parse given data, and return root of the generated DOM.
90
93
 
91
94
  ### valid(data[, options])
92
95
 
93
- Parse given data, return true if the givent data is valid, and return false if not.
96
+ Parse the data provided, return true if the given data is valid, and return false if not.
94
97
 
95
98
  ## HTMLElement Methods
96
99
 
@@ -106,7 +109,7 @@ Remove whitespaces in this sub tree.
106
109
 
107
110
  Query CSS selector to find matching nodes.
108
111
 
109
- Note: Full css3 selector supported since v3.0.0.
112
+ Note: Full range of CSS3 selectors supported since v3.0.0.
110
113
 
111
114
  ### HTMLElement#querySelector(selector)
112
115
 
@@ -116,7 +119,7 @@ Query CSS Selector to find matching node.
116
119
 
117
120
  Get all elements with the specified tagName.
118
121
 
119
- Note: * for all elements.
122
+ Note: Use * for all elements.
120
123
 
121
124
  ### HTMLElement#closest(selector)
122
125
 
@@ -128,7 +131,7 @@ Append a child node to childNodes
128
131
 
129
132
  ### HTMLElement#insertAdjacentHTML(where, html)
130
133
 
131
- parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
134
+ Parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
132
135
 
133
136
  ### HTMLElement#setAttribute(key: string, value: string)
134
137
 
@@ -186,15 +189,15 @@ Remove class name.
186
189
 
187
190
  #### HTMLElement#classList.toggle(className: string):void
188
191
 
189
- Toggle class.
192
+ Toggle class. Remove it if it is already included, otherwise add.
190
193
 
191
194
  #### HTMLElement#classList.contains(className: string): boolean
192
195
 
193
- Get if contains
196
+ Returns true if the classname is already in the classList.
194
197
 
195
198
  #### HTMLElement#classList.values()
196
199
 
197
- get class names
200
+ Get class names.
198
201
 
199
202
  ## HTMLElement Properties
200
203
 
@@ -205,28 +208,28 @@ Get unescaped text value of current node and its children. Like `innerText`.
205
208
 
206
209
  ### HTMLElement#rawText
207
210
 
208
- Get escaped (as-it) text value of current node and its children. May have
211
+ Get escaped (as-is) text value of current node and its children. May have
209
212
  `&amp;` in it. (fast)
210
213
 
211
214
  ### HTMLElement#tagName
212
215
 
213
- Get tag name of HTMLElement. Notice: the returned value would be an uppercase string.
216
+ Get or Set tag name of HTMLElement. Notice: the returned value would be an uppercase string.
214
217
 
215
218
  ### HTMLElement#structuredText
216
219
 
217
- Get structured Text
220
+ Get structured Text.
218
221
 
219
222
  ### HTMLElement#structure
220
223
 
221
- Get DOM structure
224
+ Get DOM structure.
222
225
 
223
226
  ### HTMLElement#firstChild
224
227
 
225
- Get first child node
228
+ Get first child node.
226
229
 
227
230
  ### HTMLElement#lastChild
228
231
 
229
- Get last child node
232
+ Get last child node.
230
233
 
231
234
  ### HTMLElement#innerHTML
232
235
 
package/dist/main.js CHANGED
@@ -482,6 +482,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
482
482
  get: function () {
483
483
  return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
484
484
  },
485
+ set: function (newname) {
486
+ this.rawTagName = newname.toLowerCase();
487
+ },
485
488
  enumerable: false,
486
489
  configurable: true
487
490
  });
@@ -76,6 +76,7 @@ export default class HTMLElement extends Node {
76
76
  */
77
77
  exchangeChild(oldNode: Node, newNode: Node): void;
78
78
  get tagName(): string;
79
+ set tagName(newname: string);
79
80
  get localName(): string;
80
81
  get isVoidElement(): boolean;
81
82
  /**
@@ -236,6 +236,9 @@ var HTMLElement = /** @class */ (function (_super) {
236
236
  get: function () {
237
237
  return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
238
238
  },
239
+ set: function (newname) {
240
+ this.rawTagName = newname.toLowerCase();
241
+ },
239
242
  enumerable: false,
240
243
  configurable: true
241
244
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "5.1.0",
3
+ "version": "5.2.0",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -60,25 +60,32 @@
60
60
  "@typescript-eslint/parser": "latest",
61
61
  "blanket": "latest",
62
62
  "cheerio": "^1.0.0-rc.5",
63
- "rimraf": "^3.0.2",
63
+ "cross-env": "^7.0.3",
64
64
  "eslint": "^7.32.0",
65
65
  "eslint-config-prettier": "latest",
66
66
  "eslint-plugin-import": "latest",
67
67
  "high5": "^1.0.0",
68
+ "html-dom-parser": "^1.0.4",
69
+ "html-parser": "^0.11.0",
70
+ "html5": "^1.0.5",
71
+ "html5parser": "^2.0.2",
72
+ "htmljs-parser": "^2.11.1",
68
73
  "htmlparser": "^1.7.7",
69
74
  "htmlparser-benchmark": "^1.1.3",
70
75
  "htmlparser2": "^6.0.0",
71
76
  "mocha": "latest",
72
77
  "mocha-each": "^2.0.1",
78
+ "neutron-html5parser": "^0.2.0",
73
79
  "np": "latest",
74
80
  "parse5": "^6.0.1",
81
+ "rimraf": "^3.0.2",
82
+ "saxes": "^6.0.0",
75
83
  "should": "latest",
76
84
  "spec": "latest",
77
85
  "standard-version": "^9.3.1",
78
86
  "travis-cov": "latest",
79
87
  "ts-node": "^10.2.1",
80
- "typescript": "latest",
81
- "cross-env": "^7.0.3"
88
+ "typescript": "latest"
82
89
  },
83
90
  "config": {
84
91
  "blanket": {