node-html-parser 5.1.0 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -15
- package/README.md +27 -24
- package/dist/main.js +3 -0
- package/dist/nodes/html.d.ts +1 -0
- package/dist/nodes/html.js +3 -0
- package/package.json +11 -4
package/CHANGELOG.md
CHANGED
|
@@ -4,23 +4,9 @@ All notable changes to this project will be documented in this file. See [standa
|
|
|
4
4
|
|
|
5
5
|
## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
### ⚠ BREAKING CHANGES
|
|
9
|
-
|
|
10
|
-
* Add esm named export support (closes #160 closes #139)
|
|
11
|
-
|
|
12
7
|
### Features
|
|
13
8
|
|
|
14
|
-
*
|
|
15
|
-
* Added HTMLElement#getElementsByTagName ([d462e44](https://github.com/taoqf/node-fast-html-parser/commit/d462e449e7ebb00a5a43fb574133681ad5a62475))
|
|
16
|
-
* Expose `HTMLElement#rawAttrs` (make public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
|
|
17
|
-
* Improved parsing performance + matching (closes [#164](https://github.com/taoqf/node-fast-html-parser/issues/164)) ([3c5b8e2](https://github.com/taoqf/node-fast-html-parser/commit/3c5b8e2a9104b01a8ca899a7970507463e42adaf))
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
### Bug Fixes
|
|
21
|
-
|
|
22
|
-
* Add null to return type for HTMLElement#querySelector (closes [#157](https://github.com/taoqf/node-fast-html-parser/issues/157)) ([2b65583](https://github.com/taoqf/node-fast-html-parser/commit/2b655839bd3868c41fb19cae5786ca097565bc7f))
|
|
23
|
-
* blockTextElements incorrectly matching partial tag (detail) (fixes [#156](https://github.com/taoqf/node-fast-html-parser/issues/156) fixes [#124](https://github.com/taoqf/node-fast-html-parser/issues/124)) ([6823349](https://github.com/taoqf/node-fast-html-parser/commit/6823349fdf1809c7484c70d948aa24930ef4983f))
|
|
9
|
+
* Exposed `HTMLElement#rawAttrs` (made public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
|
|
24
10
|
|
|
25
11
|
## [5.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.0.0) (2021-10-10)
|
|
26
12
|
|
package/README.md
CHANGED
|
@@ -19,15 +19,18 @@ npm install --save node-html-parser
|
|
|
19
19
|
|
|
20
20
|
## Performance
|
|
21
21
|
|
|
22
|
-
Faster than htmlparser2!
|
|
23
|
-
|
|
24
22
|
```shell
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
htmlparser2 :
|
|
30
|
-
node-html-parser:2.
|
|
23
|
+
cheerio :12.0726 ms/file ± 7.31605
|
|
24
|
+
parse5 :8.18615 ms/file ± 6.15337
|
|
25
|
+
node-html-parser (last release):2.16533 ms/file ± 1.56924
|
|
26
|
+
htmlparser :17.0658 ms/file ± 120.901
|
|
27
|
+
htmlparser2 :2.62695 ms/file ± 4.17579
|
|
28
|
+
node-html-parser:2.14907 ms/file ± 1.66632
|
|
29
|
+
html-parser :24.6505 ms/file ± 18.9996
|
|
30
|
+
htmljs-parser :5.81797 ms/file ± 6.55537
|
|
31
|
+
html-dom-parser :2.52265 ms/file ± 3.54858
|
|
32
|
+
html5parser :2.01144 ms/file ± 2.53570
|
|
33
|
+
high5 :3.91342 ms/file ± 2.65563
|
|
31
34
|
```
|
|
32
35
|
|
|
33
36
|
Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
|
|
@@ -70,15 +73,15 @@ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
|
|
|
70
73
|
|
|
71
74
|
### parse(data[, options])
|
|
72
75
|
|
|
73
|
-
Parse
|
|
76
|
+
Parse the data provided, and return the root of the generated DOM.
|
|
74
77
|
|
|
75
78
|
- **data**, data to parse
|
|
76
79
|
- **options**, parse options
|
|
77
80
|
|
|
78
81
|
```js
|
|
79
82
|
{
|
|
80
|
-
lowerCaseTagName: false, // convert tag name to lower case (
|
|
81
|
-
comment: false, // retrieve comments (
|
|
83
|
+
lowerCaseTagName: false, // convert tag name to lower case (hurts performance heavily)
|
|
84
|
+
comment: false, // retrieve comments (hurts performance slightly)
|
|
82
85
|
blockTextElements: {
|
|
83
86
|
script: true, // keep text content when parsing
|
|
84
87
|
noscript: true, // keep text content when parsing
|
|
@@ -90,7 +93,7 @@ Parse given data, and return root of the generated DOM.
|
|
|
90
93
|
|
|
91
94
|
### valid(data[, options])
|
|
92
95
|
|
|
93
|
-
Parse
|
|
96
|
+
Parse the data provided, return true if the given data is valid, and return false if not.
|
|
94
97
|
|
|
95
98
|
## HTMLElement Methods
|
|
96
99
|
|
|
@@ -106,7 +109,7 @@ Remove whitespaces in this sub tree.
|
|
|
106
109
|
|
|
107
110
|
Query CSS selector to find matching nodes.
|
|
108
111
|
|
|
109
|
-
Note: Full
|
|
112
|
+
Note: Full range of CSS3 selectors supported since v3.0.0.
|
|
110
113
|
|
|
111
114
|
### HTMLElement#querySelector(selector)
|
|
112
115
|
|
|
@@ -116,7 +119,7 @@ Query CSS Selector to find matching node.
|
|
|
116
119
|
|
|
117
120
|
Get all elements with the specified tagName.
|
|
118
121
|
|
|
119
|
-
Note: * for all elements.
|
|
122
|
+
Note: Use * for all elements.
|
|
120
123
|
|
|
121
124
|
### HTMLElement#closest(selector)
|
|
122
125
|
|
|
@@ -128,7 +131,7 @@ Append a child node to childNodes
|
|
|
128
131
|
|
|
129
132
|
### HTMLElement#insertAdjacentHTML(where, html)
|
|
130
133
|
|
|
131
|
-
|
|
134
|
+
Parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
|
|
132
135
|
|
|
133
136
|
### HTMLElement#setAttribute(key: string, value: string)
|
|
134
137
|
|
|
@@ -186,15 +189,15 @@ Remove class name.
|
|
|
186
189
|
|
|
187
190
|
#### HTMLElement#classList.toggle(className: string):void
|
|
188
191
|
|
|
189
|
-
Toggle class.
|
|
192
|
+
Toggle class. Remove it if it is already included, otherwise add.
|
|
190
193
|
|
|
191
194
|
#### HTMLElement#classList.contains(className: string): boolean
|
|
192
195
|
|
|
193
|
-
|
|
196
|
+
Returns true if the classname is already in the classList.
|
|
194
197
|
|
|
195
198
|
#### HTMLElement#classList.values()
|
|
196
199
|
|
|
197
|
-
|
|
200
|
+
Get class names.
|
|
198
201
|
|
|
199
202
|
## HTMLElement Properties
|
|
200
203
|
|
|
@@ -205,28 +208,28 @@ Get unescaped text value of current node and its children. Like `innerText`.
|
|
|
205
208
|
|
|
206
209
|
### HTMLElement#rawText
|
|
207
210
|
|
|
208
|
-
Get escaped (as-
|
|
211
|
+
Get escaped (as-is) text value of current node and its children. May have
|
|
209
212
|
`&` in it. (fast)
|
|
210
213
|
|
|
211
214
|
### HTMLElement#tagName
|
|
212
215
|
|
|
213
|
-
Get tag name of HTMLElement. Notice: the returned value would be an uppercase string.
|
|
216
|
+
Get or Set tag name of HTMLElement. Notice: the returned value would be an uppercase string.
|
|
214
217
|
|
|
215
218
|
### HTMLElement#structuredText
|
|
216
219
|
|
|
217
|
-
Get structured Text
|
|
220
|
+
Get structured Text.
|
|
218
221
|
|
|
219
222
|
### HTMLElement#structure
|
|
220
223
|
|
|
221
|
-
Get DOM structure
|
|
224
|
+
Get DOM structure.
|
|
222
225
|
|
|
223
226
|
### HTMLElement#firstChild
|
|
224
227
|
|
|
225
|
-
Get first child node
|
|
228
|
+
Get first child node.
|
|
226
229
|
|
|
227
230
|
### HTMLElement#lastChild
|
|
228
231
|
|
|
229
|
-
Get last child node
|
|
232
|
+
Get last child node.
|
|
230
233
|
|
|
231
234
|
### HTMLElement#innerHTML
|
|
232
235
|
|
package/dist/main.js
CHANGED
|
@@ -482,6 +482,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
482
482
|
get: function () {
|
|
483
483
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
484
484
|
},
|
|
485
|
+
set: function (newname) {
|
|
486
|
+
this.rawTagName = newname.toLowerCase();
|
|
487
|
+
},
|
|
485
488
|
enumerable: false,
|
|
486
489
|
configurable: true
|
|
487
490
|
});
|
package/dist/nodes/html.d.ts
CHANGED
package/dist/nodes/html.js
CHANGED
|
@@ -236,6 +236,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
236
236
|
get: function () {
|
|
237
237
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
238
238
|
},
|
|
239
|
+
set: function (newname) {
|
|
240
|
+
this.rawTagName = newname.toLowerCase();
|
|
241
|
+
},
|
|
239
242
|
enumerable: false,
|
|
240
243
|
configurable: true
|
|
241
244
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.0",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -60,25 +60,32 @@
|
|
|
60
60
|
"@typescript-eslint/parser": "latest",
|
|
61
61
|
"blanket": "latest",
|
|
62
62
|
"cheerio": "^1.0.0-rc.5",
|
|
63
|
-
"
|
|
63
|
+
"cross-env": "^7.0.3",
|
|
64
64
|
"eslint": "^7.32.0",
|
|
65
65
|
"eslint-config-prettier": "latest",
|
|
66
66
|
"eslint-plugin-import": "latest",
|
|
67
67
|
"high5": "^1.0.0",
|
|
68
|
+
"html-dom-parser": "^1.0.4",
|
|
69
|
+
"html-parser": "^0.11.0",
|
|
70
|
+
"html5": "^1.0.5",
|
|
71
|
+
"html5parser": "^2.0.2",
|
|
72
|
+
"htmljs-parser": "^2.11.1",
|
|
68
73
|
"htmlparser": "^1.7.7",
|
|
69
74
|
"htmlparser-benchmark": "^1.1.3",
|
|
70
75
|
"htmlparser2": "^6.0.0",
|
|
71
76
|
"mocha": "latest",
|
|
72
77
|
"mocha-each": "^2.0.1",
|
|
78
|
+
"neutron-html5parser": "^0.2.0",
|
|
73
79
|
"np": "latest",
|
|
74
80
|
"parse5": "^6.0.1",
|
|
81
|
+
"rimraf": "^3.0.2",
|
|
82
|
+
"saxes": "^6.0.0",
|
|
75
83
|
"should": "latest",
|
|
76
84
|
"spec": "latest",
|
|
77
85
|
"standard-version": "^9.3.1",
|
|
78
86
|
"travis-cov": "latest",
|
|
79
87
|
"ts-node": "^10.2.1",
|
|
80
|
-
"typescript": "latest"
|
|
81
|
-
"cross-env": "^7.0.3"
|
|
88
|
+
"typescript": "latest"
|
|
82
89
|
},
|
|
83
90
|
"config": {
|
|
84
91
|
"blanket": {
|