node-html-parser 5.1.0 → 5.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -15
- package/README.md +39 -24
- package/dist/main.js +245 -184
- package/dist/nodes/comment.d.ts +2 -1
- package/dist/nodes/comment.js +4 -1
- package/dist/nodes/html.d.ts +8 -0
- package/dist/nodes/html.js +87 -32
- package/dist/nodes/node.d.ts +1 -0
- package/dist/nodes/text.d.ts +1 -0
- package/dist/nodes/text.js +3 -0
- package/package.json +17 -10
package/CHANGELOG.md
CHANGED
|
@@ -4,23 +4,9 @@ All notable changes to this project will be documented in this file. See [standa
|
|
|
4
4
|
|
|
5
5
|
## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
### ⚠ BREAKING CHANGES
|
|
9
|
-
|
|
10
|
-
* Add esm named export support (closes #160 closes #139)
|
|
11
|
-
|
|
12
7
|
### Features
|
|
13
8
|
|
|
14
|
-
*
|
|
15
|
-
* Added HTMLElement#getElementsByTagName ([d462e44](https://github.com/taoqf/node-fast-html-parser/commit/d462e449e7ebb00a5a43fb574133681ad5a62475))
|
|
16
|
-
* Expose `HTMLElement#rawAttrs` (make public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
|
|
17
|
-
* Improved parsing performance + matching (closes [#164](https://github.com/taoqf/node-fast-html-parser/issues/164)) ([3c5b8e2](https://github.com/taoqf/node-fast-html-parser/commit/3c5b8e2a9104b01a8ca899a7970507463e42adaf))
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
### Bug Fixes
|
|
21
|
-
|
|
22
|
-
* Add null to return type for HTMLElement#querySelector (closes [#157](https://github.com/taoqf/node-fast-html-parser/issues/157)) ([2b65583](https://github.com/taoqf/node-fast-html-parser/commit/2b655839bd3868c41fb19cae5786ca097565bc7f))
|
|
23
|
-
* blockTextElements incorrectly matching partial tag (detail) (fixes [#156](https://github.com/taoqf/node-fast-html-parser/issues/156) fixes [#124](https://github.com/taoqf/node-fast-html-parser/issues/124)) ([6823349](https://github.com/taoqf/node-fast-html-parser/commit/6823349fdf1809c7484c70d948aa24930ef4983f))
|
|
9
|
+
* Exposed `HTMLElement#rawAttrs` (made public) ([34f1595](https://github.com/taoqf/node-fast-html-parser/commit/34f1595756c0974b6ae7ef5755a615f09e421f32))
|
|
24
10
|
|
|
25
11
|
## [5.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.0.0) (2021-10-10)
|
|
26
12
|
|
package/README.md
CHANGED
|
@@ -19,15 +19,18 @@ npm install --save node-html-parser
|
|
|
19
19
|
|
|
20
20
|
## Performance
|
|
21
21
|
|
|
22
|
-
Faster than htmlparser2!
|
|
23
|
-
|
|
24
22
|
```shell
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
htmlparser2 :
|
|
30
|
-
node-html-parser:2.
|
|
23
|
+
cheerio :12.0726 ms/file ± 7.31605
|
|
24
|
+
parse5 :8.18615 ms/file ± 6.15337
|
|
25
|
+
node-html-parser (last release):2.16533 ms/file ± 1.56924
|
|
26
|
+
htmlparser :17.0658 ms/file ± 120.901
|
|
27
|
+
htmlparser2 :2.62695 ms/file ± 4.17579
|
|
28
|
+
node-html-parser:2.14907 ms/file ± 1.66632
|
|
29
|
+
html-parser :24.6505 ms/file ± 18.9996
|
|
30
|
+
htmljs-parser :5.81797 ms/file ± 6.55537
|
|
31
|
+
html-dom-parser :2.52265 ms/file ± 3.54858
|
|
32
|
+
html5parser :2.01144 ms/file ± 2.53570
|
|
33
|
+
high5 :3.91342 ms/file ± 2.65563
|
|
31
34
|
```
|
|
32
35
|
|
|
33
36
|
Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
|
|
@@ -70,15 +73,15 @@ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
|
|
|
70
73
|
|
|
71
74
|
### parse(data[, options])
|
|
72
75
|
|
|
73
|
-
Parse
|
|
76
|
+
Parse the data provided, and return the root of the generated DOM.
|
|
74
77
|
|
|
75
78
|
- **data**, data to parse
|
|
76
79
|
- **options**, parse options
|
|
77
80
|
|
|
78
81
|
```js
|
|
79
82
|
{
|
|
80
|
-
lowerCaseTagName: false, // convert tag name to lower case (
|
|
81
|
-
comment: false, // retrieve comments (
|
|
83
|
+
lowerCaseTagName: false, // convert tag name to lower case (hurts performance heavily)
|
|
84
|
+
comment: false, // retrieve comments (hurts performance slightly)
|
|
82
85
|
blockTextElements: {
|
|
83
86
|
script: true, // keep text content when parsing
|
|
84
87
|
noscript: true, // keep text content when parsing
|
|
@@ -90,7 +93,7 @@ Parse given data, and return root of the generated DOM.
|
|
|
90
93
|
|
|
91
94
|
### valid(data[, options])
|
|
92
95
|
|
|
93
|
-
Parse
|
|
96
|
+
Parse the data provided, return true if the given data is valid, and return false if not.
|
|
94
97
|
|
|
95
98
|
## HTMLElement Methods
|
|
96
99
|
|
|
@@ -106,7 +109,7 @@ Remove whitespaces in this sub tree.
|
|
|
106
109
|
|
|
107
110
|
Query CSS selector to find matching nodes.
|
|
108
111
|
|
|
109
|
-
Note: Full
|
|
112
|
+
Note: Full range of CSS3 selectors supported since v3.0.0.
|
|
110
113
|
|
|
111
114
|
### HTMLElement#querySelector(selector)
|
|
112
115
|
|
|
@@ -116,7 +119,7 @@ Query CSS Selector to find matching node.
|
|
|
116
119
|
|
|
117
120
|
Get all elements with the specified tagName.
|
|
118
121
|
|
|
119
|
-
Note: * for all elements.
|
|
122
|
+
Note: Use * for all elements.
|
|
120
123
|
|
|
121
124
|
### HTMLElement#closest(selector)
|
|
122
125
|
|
|
@@ -128,7 +131,7 @@ Append a child node to childNodes
|
|
|
128
131
|
|
|
129
132
|
### HTMLElement#insertAdjacentHTML(where, html)
|
|
130
133
|
|
|
131
|
-
|
|
134
|
+
Parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
|
|
132
135
|
|
|
133
136
|
### HTMLElement#setAttribute(key: string, value: string)
|
|
134
137
|
|
|
@@ -186,15 +189,19 @@ Remove class name.
|
|
|
186
189
|
|
|
187
190
|
#### HTMLElement#classList.toggle(className: string):void
|
|
188
191
|
|
|
189
|
-
Toggle class.
|
|
192
|
+
Toggle class. Remove it if it is already included, otherwise add.
|
|
190
193
|
|
|
191
194
|
#### HTMLElement#classList.contains(className: string): boolean
|
|
192
195
|
|
|
193
|
-
|
|
196
|
+
Returns true if the classname is already in the classList.
|
|
194
197
|
|
|
195
198
|
#### HTMLElement#classList.values()
|
|
196
199
|
|
|
197
|
-
|
|
200
|
+
Get class names.
|
|
201
|
+
|
|
202
|
+
#### Node#clone()
|
|
203
|
+
|
|
204
|
+
Clone a node.
|
|
198
205
|
|
|
199
206
|
## HTMLElement Properties
|
|
200
207
|
|
|
@@ -205,28 +212,28 @@ Get unescaped text value of current node and its children. Like `innerText`.
|
|
|
205
212
|
|
|
206
213
|
### HTMLElement#rawText
|
|
207
214
|
|
|
208
|
-
Get escaped (as-
|
|
215
|
+
Get escaped (as-is) text value of current node and its children. May have
|
|
209
216
|
`&` in it. (fast)
|
|
210
217
|
|
|
211
218
|
### HTMLElement#tagName
|
|
212
219
|
|
|
213
|
-
Get tag name of HTMLElement. Notice: the returned value would be an uppercase string.
|
|
220
|
+
Get or Set tag name of HTMLElement. Notice: the returned value would be an uppercase string.
|
|
214
221
|
|
|
215
222
|
### HTMLElement#structuredText
|
|
216
223
|
|
|
217
|
-
Get structured Text
|
|
224
|
+
Get structured Text.
|
|
218
225
|
|
|
219
226
|
### HTMLElement#structure
|
|
220
227
|
|
|
221
|
-
Get DOM structure
|
|
228
|
+
Get DOM structure.
|
|
222
229
|
|
|
223
230
|
### HTMLElement#firstChild
|
|
224
231
|
|
|
225
|
-
Get first child node
|
|
232
|
+
Get first child node.
|
|
226
233
|
|
|
227
234
|
### HTMLElement#lastChild
|
|
228
235
|
|
|
229
|
-
Get last child node
|
|
236
|
+
Get last child node.
|
|
230
237
|
|
|
231
238
|
### HTMLElement#innerHTML
|
|
232
239
|
|
|
@@ -244,6 +251,14 @@ Returns a reference to the next child node of the current element's parent.
|
|
|
244
251
|
|
|
245
252
|
Returns a reference to the next child element of the current element's parent.
|
|
246
253
|
|
|
254
|
+
### HTMLElement#previousSibling
|
|
255
|
+
|
|
256
|
+
Returns a reference to the previous child node of the current element's parent.
|
|
257
|
+
|
|
258
|
+
### HTMLElement#previousElementSibling
|
|
259
|
+
|
|
260
|
+
Returns a reference to the previous child element of the current element's parent.
|
|
261
|
+
|
|
247
262
|
### HTMLElement#textContent
|
|
248
263
|
|
|
249
264
|
Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
|
package/dist/main.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
2
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
3
|
+
};
|
|
1
4
|
var __extends = (this && this.__extends) || (function () {
|
|
2
5
|
var extendStatics = function (d, b) {
|
|
3
6
|
extendStatics = Object.setPrototypeOf ||
|
|
@@ -13,9 +16,6 @@ var __extends = (this && this.__extends) || (function () {
|
|
|
13
16
|
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
14
17
|
};
|
|
15
18
|
})();
|
|
16
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
17
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
|
-
};
|
|
19
19
|
var __assign = (this && this.__assign) || function () {
|
|
20
20
|
__assign = Object.assign || function(t) {
|
|
21
21
|
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
@@ -55,134 +55,51 @@ define("nodes/type", ["require", "exports"], function (require, exports) {
|
|
|
55
55
|
})(NodeType || (NodeType = {}));
|
|
56
56
|
exports.default = NodeType;
|
|
57
57
|
});
|
|
58
|
-
define("nodes/
|
|
58
|
+
define("nodes/node", ["require", "exports", "he"], function (require, exports, he_1) {
|
|
59
59
|
"use strict";
|
|
60
60
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
61
|
-
node_1 = __importDefault(node_1);
|
|
62
|
-
type_1 = __importDefault(type_1);
|
|
63
61
|
/**
|
|
64
|
-
*
|
|
65
|
-
* @param {string} value [description]
|
|
62
|
+
* Node Class as base class for TextNode and HTMLElement.
|
|
66
63
|
*/
|
|
67
|
-
var
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
64
|
+
var Node = /** @class */ (function () {
|
|
65
|
+
function Node(parentNode, range) {
|
|
66
|
+
if (parentNode === void 0) { parentNode = null; }
|
|
67
|
+
this.parentNode = parentNode;
|
|
68
|
+
this.childNodes = [];
|
|
69
|
+
Object.defineProperty(this, 'range', {
|
|
70
|
+
enumerable: false,
|
|
71
|
+
writable: true,
|
|
72
|
+
configurable: true,
|
|
73
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
74
|
+
});
|
|
78
75
|
}
|
|
79
|
-
Object.defineProperty(
|
|
80
|
-
get: function () {
|
|
81
|
-
return this._rawText;
|
|
82
|
-
},
|
|
83
|
-
/**
|
|
84
|
-
* Set rawText and invalidate trimmed caches
|
|
85
|
-
*/
|
|
86
|
-
set: function (text) {
|
|
87
|
-
this._rawText = text;
|
|
88
|
-
this._trimmedRawText = void 0;
|
|
89
|
-
this._trimmedText = void 0;
|
|
90
|
-
},
|
|
91
|
-
enumerable: false,
|
|
92
|
-
configurable: true
|
|
93
|
-
});
|
|
94
|
-
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
95
|
-
/**
|
|
96
|
-
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
97
|
-
*/
|
|
98
|
-
get: function () {
|
|
99
|
-
if (this._trimmedRawText !== undefined)
|
|
100
|
-
return this._trimmedRawText;
|
|
101
|
-
this._trimmedRawText = trimText(this.rawText);
|
|
102
|
-
return this._trimmedRawText;
|
|
103
|
-
},
|
|
104
|
-
enumerable: false,
|
|
105
|
-
configurable: true
|
|
106
|
-
});
|
|
107
|
-
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
108
|
-
/**
|
|
109
|
-
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
110
|
-
*/
|
|
76
|
+
Object.defineProperty(Node.prototype, "innerText", {
|
|
111
77
|
get: function () {
|
|
112
|
-
|
|
113
|
-
return this._trimmedText;
|
|
114
|
-
this._trimmedText = trimText(this.text);
|
|
115
|
-
return this._trimmedText;
|
|
78
|
+
return this.rawText;
|
|
116
79
|
},
|
|
117
80
|
enumerable: false,
|
|
118
81
|
configurable: true
|
|
119
82
|
});
|
|
120
|
-
Object.defineProperty(
|
|
121
|
-
/**
|
|
122
|
-
* Get unescaped text value of current node and its children.
|
|
123
|
-
* @return {string} text content
|
|
124
|
-
*/
|
|
83
|
+
Object.defineProperty(Node.prototype, "textContent", {
|
|
125
84
|
get: function () {
|
|
126
85
|
return (0, he_1.decode)(this.rawText);
|
|
127
86
|
},
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
});
|
|
131
|
-
Object.defineProperty(TextNode.prototype, "isWhitespace", {
|
|
132
|
-
/**
|
|
133
|
-
* Detect if the node contains only white space.
|
|
134
|
-
* @return {boolean}
|
|
135
|
-
*/
|
|
136
|
-
get: function () {
|
|
137
|
-
return /^(\s| )*$/.test(this.rawText);
|
|
87
|
+
set: function (val) {
|
|
88
|
+
this.rawText = (0, he_1.encode)(val);
|
|
138
89
|
},
|
|
139
90
|
enumerable: false,
|
|
140
91
|
configurable: true
|
|
141
92
|
});
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
return TextNode;
|
|
146
|
-
}(node_1.default));
|
|
147
|
-
exports.default = TextNode;
|
|
148
|
-
/**
|
|
149
|
-
* Trim whitespace except single leading/trailing non-breaking space
|
|
150
|
-
*/
|
|
151
|
-
function trimText(text) {
|
|
152
|
-
var i = 0;
|
|
153
|
-
var startPos;
|
|
154
|
-
var endPos;
|
|
155
|
-
while (i >= 0 && i < text.length) {
|
|
156
|
-
if (/\S/.test(text[i])) {
|
|
157
|
-
if (startPos === undefined) {
|
|
158
|
-
startPos = i;
|
|
159
|
-
i = text.length;
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
endPos = i;
|
|
163
|
-
i = void 0;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
if (startPos === undefined)
|
|
167
|
-
i++;
|
|
168
|
-
else
|
|
169
|
-
i--;
|
|
170
|
-
}
|
|
171
|
-
if (startPos === undefined)
|
|
172
|
-
startPos = 0;
|
|
173
|
-
if (endPos === undefined)
|
|
174
|
-
endPos = text.length - 1;
|
|
175
|
-
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
176
|
-
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
177
|
-
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
178
|
-
}
|
|
93
|
+
return Node;
|
|
94
|
+
}());
|
|
95
|
+
exports.default = Node;
|
|
179
96
|
});
|
|
180
|
-
define("matcher", ["require", "exports", "nodes/type"], function (require, exports,
|
|
97
|
+
define("matcher", ["require", "exports", "nodes/type"], function (require, exports, type_1) {
|
|
181
98
|
"use strict";
|
|
182
99
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
183
|
-
|
|
100
|
+
type_1 = __importDefault(type_1);
|
|
184
101
|
function isTag(node) {
|
|
185
|
-
return node && node.nodeType ===
|
|
102
|
+
return node && node.nodeType === type_1.default.ELEMENT_NODE;
|
|
186
103
|
}
|
|
187
104
|
function getAttributeValue(elem, name) {
|
|
188
105
|
return isTag(elem) ? elem.getAttribute(name) : undefined;
|
|
@@ -282,21 +199,146 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
|
|
|
282
199
|
findAll: findAll
|
|
283
200
|
};
|
|
284
201
|
});
|
|
285
|
-
define("nodes/
|
|
202
|
+
define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], function (require, exports, he_2, node_1, type_2) {
|
|
203
|
+
"use strict";
|
|
204
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
205
|
+
node_1 = __importDefault(node_1);
|
|
206
|
+
type_2 = __importDefault(type_2);
|
|
207
|
+
/**
|
|
208
|
+
* TextNode to contain a text element in DOM tree.
|
|
209
|
+
* @param {string} value [description]
|
|
210
|
+
*/
|
|
211
|
+
var TextNode = /** @class */ (function (_super) {
|
|
212
|
+
__extends(TextNode, _super);
|
|
213
|
+
function TextNode(rawText, parentNode, range) {
|
|
214
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
215
|
+
/**
|
|
216
|
+
* Node Type declaration.
|
|
217
|
+
* @type {Number}
|
|
218
|
+
*/
|
|
219
|
+
_this.nodeType = type_2.default.TEXT_NODE;
|
|
220
|
+
_this._rawText = rawText;
|
|
221
|
+
return _this;
|
|
222
|
+
}
|
|
223
|
+
TextNode.prototype.clone = function () {
|
|
224
|
+
return new TextNode(this._rawText, null);
|
|
225
|
+
};
|
|
226
|
+
Object.defineProperty(TextNode.prototype, "rawText", {
|
|
227
|
+
get: function () {
|
|
228
|
+
return this._rawText;
|
|
229
|
+
},
|
|
230
|
+
/**
|
|
231
|
+
* Set rawText and invalidate trimmed caches
|
|
232
|
+
*/
|
|
233
|
+
set: function (text) {
|
|
234
|
+
this._rawText = text;
|
|
235
|
+
this._trimmedRawText = void 0;
|
|
236
|
+
this._trimmedText = void 0;
|
|
237
|
+
},
|
|
238
|
+
enumerable: false,
|
|
239
|
+
configurable: true
|
|
240
|
+
});
|
|
241
|
+
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
242
|
+
/**
|
|
243
|
+
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
244
|
+
*/
|
|
245
|
+
get: function () {
|
|
246
|
+
if (this._trimmedRawText !== undefined)
|
|
247
|
+
return this._trimmedRawText;
|
|
248
|
+
this._trimmedRawText = trimText(this.rawText);
|
|
249
|
+
return this._trimmedRawText;
|
|
250
|
+
},
|
|
251
|
+
enumerable: false,
|
|
252
|
+
configurable: true
|
|
253
|
+
});
|
|
254
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
255
|
+
/**
|
|
256
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
257
|
+
*/
|
|
258
|
+
get: function () {
|
|
259
|
+
if (this._trimmedText !== undefined)
|
|
260
|
+
return this._trimmedText;
|
|
261
|
+
this._trimmedText = trimText(this.text);
|
|
262
|
+
return this._trimmedText;
|
|
263
|
+
},
|
|
264
|
+
enumerable: false,
|
|
265
|
+
configurable: true
|
|
266
|
+
});
|
|
267
|
+
Object.defineProperty(TextNode.prototype, "text", {
|
|
268
|
+
/**
|
|
269
|
+
* Get unescaped text value of current node and its children.
|
|
270
|
+
* @return {string} text content
|
|
271
|
+
*/
|
|
272
|
+
get: function () {
|
|
273
|
+
return (0, he_2.decode)(this.rawText);
|
|
274
|
+
},
|
|
275
|
+
enumerable: false,
|
|
276
|
+
configurable: true
|
|
277
|
+
});
|
|
278
|
+
Object.defineProperty(TextNode.prototype, "isWhitespace", {
|
|
279
|
+
/**
|
|
280
|
+
* Detect if the node contains only white space.
|
|
281
|
+
* @return {boolean}
|
|
282
|
+
*/
|
|
283
|
+
get: function () {
|
|
284
|
+
return /^(\s| )*$/.test(this.rawText);
|
|
285
|
+
},
|
|
286
|
+
enumerable: false,
|
|
287
|
+
configurable: true
|
|
288
|
+
});
|
|
289
|
+
TextNode.prototype.toString = function () {
|
|
290
|
+
return this.rawText;
|
|
291
|
+
};
|
|
292
|
+
return TextNode;
|
|
293
|
+
}(node_1.default));
|
|
294
|
+
exports.default = TextNode;
|
|
295
|
+
/**
|
|
296
|
+
* Trim whitespace except single leading/trailing non-breaking space
|
|
297
|
+
*/
|
|
298
|
+
function trimText(text) {
|
|
299
|
+
var i = 0;
|
|
300
|
+
var startPos;
|
|
301
|
+
var endPos;
|
|
302
|
+
while (i >= 0 && i < text.length) {
|
|
303
|
+
if (/\S/.test(text[i])) {
|
|
304
|
+
if (startPos === undefined) {
|
|
305
|
+
startPos = i;
|
|
306
|
+
i = text.length;
|
|
307
|
+
}
|
|
308
|
+
else {
|
|
309
|
+
endPos = i;
|
|
310
|
+
i = void 0;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (startPos === undefined)
|
|
314
|
+
i++;
|
|
315
|
+
else
|
|
316
|
+
i--;
|
|
317
|
+
}
|
|
318
|
+
if (startPos === undefined)
|
|
319
|
+
startPos = 0;
|
|
320
|
+
if (endPos === undefined)
|
|
321
|
+
endPos = text.length - 1;
|
|
322
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
323
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
324
|
+
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
325
|
+
}
|
|
326
|
+
});
|
|
327
|
+
define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher", "nodes/comment", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, css_select_1, he_3, back_1, matcher_1, comment_1, node_2, text_1, type_3) {
|
|
286
328
|
"use strict";
|
|
287
329
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
288
330
|
exports.parse = exports.base_parse = void 0;
|
|
289
|
-
|
|
290
|
-
node_2 = __importDefault(node_2);
|
|
291
|
-
type_3 = __importDefault(type_3);
|
|
292
|
-
text_1 = __importDefault(text_1);
|
|
293
|
-
matcher_1 = __importDefault(matcher_1);
|
|
331
|
+
he_3 = __importDefault(he_3);
|
|
294
332
|
back_1 = __importDefault(back_1);
|
|
333
|
+
matcher_1 = __importDefault(matcher_1);
|
|
295
334
|
comment_1 = __importDefault(comment_1);
|
|
335
|
+
node_2 = __importDefault(node_2);
|
|
336
|
+
text_1 = __importDefault(text_1);
|
|
337
|
+
type_3 = __importDefault(type_3);
|
|
296
338
|
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
297
339
|
function decode(val) {
|
|
298
340
|
// clone string
|
|
299
|
-
return JSON.parse(JSON.stringify(
|
|
341
|
+
return JSON.parse(JSON.stringify(he_3.default.decode(val)));
|
|
300
342
|
}
|
|
301
343
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
302
344
|
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
@@ -332,7 +374,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
332
374
|
}
|
|
333
375
|
DOMTokenList.prototype._validate = function (c) {
|
|
334
376
|
if (/\s/.test(c)) {
|
|
335
|
-
throw new Error("DOMException in DOMTokenList.add: The token '"
|
|
377
|
+
throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
|
|
336
378
|
}
|
|
337
379
|
};
|
|
338
380
|
DOMTokenList.prototype.add = function (c) {
|
|
@@ -416,14 +458,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
416
458
|
);
|
|
417
459
|
if (keyAttrs.id) {
|
|
418
460
|
if (!rawAttrs) {
|
|
419
|
-
_this.rawAttrs = "id=\""
|
|
461
|
+
_this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
|
|
420
462
|
}
|
|
421
463
|
}
|
|
422
464
|
if (keyAttrs.class) {
|
|
423
465
|
if (!rawAttrs) {
|
|
424
|
-
var cls = "class=\""
|
|
466
|
+
var cls = "class=\"".concat(_this.classList.toString(), "\"");
|
|
425
467
|
if (_this.rawAttrs) {
|
|
426
|
-
_this.rawAttrs += " "
|
|
468
|
+
_this.rawAttrs += " ".concat(cls);
|
|
427
469
|
}
|
|
428
470
|
else {
|
|
429
471
|
_this.rawAttrs = cls;
|
|
@@ -482,6 +524,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
482
524
|
get: function () {
|
|
483
525
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
484
526
|
},
|
|
527
|
+
set: function (newname) {
|
|
528
|
+
this.rawTagName = newname.toLowerCase();
|
|
529
|
+
},
|
|
485
530
|
enumerable: false,
|
|
486
531
|
configurable: true
|
|
487
532
|
});
|
|
@@ -565,7 +610,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
565
610
|
else {
|
|
566
611
|
var text = node.trimmedText;
|
|
567
612
|
if (currentBlock.prependWhitespace) {
|
|
568
|
-
text = " "
|
|
613
|
+
text = " ".concat(text);
|
|
569
614
|
currentBlock.prependWhitespace = false;
|
|
570
615
|
}
|
|
571
616
|
currentBlock.push(text);
|
|
@@ -586,8 +631,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
586
631
|
HTMLElement.prototype.toString = function () {
|
|
587
632
|
var tag = this.rawTagName;
|
|
588
633
|
if (tag) {
|
|
589
|
-
var attrs = this.rawAttrs ? " "
|
|
590
|
-
return this.isVoidElement ? "<"
|
|
634
|
+
var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
|
|
635
|
+
return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
|
|
591
636
|
}
|
|
592
637
|
return this.innerHTML;
|
|
593
638
|
};
|
|
@@ -683,9 +728,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
683
728
|
res.push(' '.repeat(indention) + str);
|
|
684
729
|
}
|
|
685
730
|
function dfs(node) {
|
|
686
|
-
var idStr = node.id ? "#"
|
|
687
|
-
var classStr = node.classList.length ? "."
|
|
688
|
-
write(""
|
|
731
|
+
var idStr = node.id ? "#".concat(node.id) : '';
|
|
732
|
+
var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
733
|
+
write("".concat(node.rawTagName).concat(idStr).concat(classStr));
|
|
689
734
|
indention++;
|
|
690
735
|
node.childNodes.forEach(function (childNode) {
|
|
691
736
|
if (childNode.nodeType === type_3.default.ELEMENT_NODE) {
|
|
@@ -944,7 +989,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
944
989
|
if (val === undefined || val === 'null') {
|
|
945
990
|
return name;
|
|
946
991
|
}
|
|
947
|
-
return name
|
|
992
|
+
return "".concat(name, "=").concat(val);
|
|
948
993
|
})
|
|
949
994
|
.join(' ');
|
|
950
995
|
// Update this.id
|
|
@@ -991,7 +1036,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
991
1036
|
var val = _this.quoteAttribute(attrs[name]);
|
|
992
1037
|
if (val === 'null' || val === '""')
|
|
993
1038
|
return name;
|
|
994
|
-
return name
|
|
1039
|
+
return "".concat(name, "=").concat(val);
|
|
995
1040
|
})
|
|
996
1041
|
.join(' ');
|
|
997
1042
|
// Update this.id
|
|
@@ -1019,7 +1064,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1019
1064
|
var val = attributes[name];
|
|
1020
1065
|
if (val === 'null' || val === '""')
|
|
1021
1066
|
return name;
|
|
1022
|
-
return name
|
|
1067
|
+
return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
|
|
1023
1068
|
})
|
|
1024
1069
|
.join(' ');
|
|
1025
1070
|
};
|
|
@@ -1061,7 +1106,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1061
1106
|
});
|
|
1062
1107
|
}
|
|
1063
1108
|
else {
|
|
1064
|
-
throw new Error("The value provided ('"
|
|
1109
|
+
throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
|
|
1065
1110
|
}
|
|
1066
1111
|
// if (!where || html === undefined || html === null) {
|
|
1067
1112
|
// return;
|
|
@@ -1106,6 +1151,45 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1106
1151
|
enumerable: false,
|
|
1107
1152
|
configurable: true
|
|
1108
1153
|
});
|
|
1154
|
+
Object.defineProperty(HTMLElement.prototype, "previousSibling", {
|
|
1155
|
+
get: function () {
|
|
1156
|
+
if (this.parentNode) {
|
|
1157
|
+
var children = this.parentNode.childNodes;
|
|
1158
|
+
var i = children.length;
|
|
1159
|
+
while (i > 0) {
|
|
1160
|
+
var child = children[--i];
|
|
1161
|
+
if (this === child)
|
|
1162
|
+
return children[i - 1] || null;
|
|
1163
|
+
}
|
|
1164
|
+
return null;
|
|
1165
|
+
}
|
|
1166
|
+
},
|
|
1167
|
+
enumerable: false,
|
|
1168
|
+
configurable: true
|
|
1169
|
+
});
|
|
1170
|
+
Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
|
|
1171
|
+
get: function () {
|
|
1172
|
+
if (this.parentNode) {
|
|
1173
|
+
var children = this.parentNode.childNodes;
|
|
1174
|
+
var i = children.length;
|
|
1175
|
+
var find = false;
|
|
1176
|
+
while (i > 0) {
|
|
1177
|
+
var child = children[--i];
|
|
1178
|
+
if (find) {
|
|
1179
|
+
if (child instanceof HTMLElement) {
|
|
1180
|
+
return child || null;
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
else if (this === child) {
|
|
1184
|
+
find = true;
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
return null;
|
|
1188
|
+
}
|
|
1189
|
+
},
|
|
1190
|
+
enumerable: false,
|
|
1191
|
+
configurable: true
|
|
1192
|
+
});
|
|
1109
1193
|
Object.defineProperty(HTMLElement.prototype, "classNames", {
|
|
1110
1194
|
get: function () {
|
|
1111
1195
|
return this.classList.toString();
|
|
@@ -1113,6 +1197,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1113
1197
|
enumerable: false,
|
|
1114
1198
|
configurable: true
|
|
1115
1199
|
});
|
|
1200
|
+
/**
|
|
1201
|
+
* Clone this Node
|
|
1202
|
+
*/
|
|
1203
|
+
HTMLElement.prototype.clone = function () {
|
|
1204
|
+
return parse(this.toString()).firstChild;
|
|
1205
|
+
};
|
|
1116
1206
|
return HTMLElement;
|
|
1117
1207
|
}(node_2.default));
|
|
1118
1208
|
exports.default = HTMLElement;
|
|
@@ -1205,8 +1295,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1205
1295
|
pre: true,
|
|
1206
1296
|
};
|
|
1207
1297
|
var element_names = Object.keys(elements);
|
|
1208
|
-
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^"
|
|
1209
|
-
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^"
|
|
1298
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1299
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1210
1300
|
function element_should_be_ignore(tag) {
|
|
1211
1301
|
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1212
1302
|
}
|
|
@@ -1221,7 +1311,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1221
1311
|
var noNestedTagIndex = undefined;
|
|
1222
1312
|
var match;
|
|
1223
1313
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1224
|
-
data = "<"
|
|
1314
|
+
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
1225
1315
|
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1226
1316
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1227
1317
|
var frameFlagOffset = frameflag.length + 2;
|
|
@@ -1289,7 +1379,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1289
1379
|
stack.push(currentParent);
|
|
1290
1380
|
if (is_block_text_element(tagName)) {
|
|
1291
1381
|
// Find closing tag
|
|
1292
|
-
var closeMarkup = "</"
|
|
1382
|
+
var closeMarkup = "</".concat(tagName, ">");
|
|
1293
1383
|
var closeIndex = lowerCaseTagName
|
|
1294
1384
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1295
1385
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
@@ -1356,18 +1446,25 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1356
1446
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1357
1447
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1358
1448
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
oneBefore.
|
|
1362
|
-
|
|
1363
|
-
|
|
1449
|
+
// this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
|
|
1450
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1451
|
+
oneBefore.removeChild(last);
|
|
1452
|
+
last.childNodes.forEach(function (child) {
|
|
1453
|
+
oneBefore.parentNode.appendChild(child);
|
|
1454
|
+
});
|
|
1455
|
+
stack.pop();
|
|
1456
|
+
}
|
|
1364
1457
|
}
|
|
1365
1458
|
else {
|
|
1366
1459
|
// Single error <div> <h3> </div> handle: Just removes <h3>
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
|
|
1460
|
+
// Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
|
|
1461
|
+
// eslint-disable-next-line no-lonely-if
|
|
1462
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1463
|
+
oneBefore.removeChild(last);
|
|
1464
|
+
last.childNodes.forEach(function (child) {
|
|
1465
|
+
oneBefore.appendChild(child);
|
|
1466
|
+
});
|
|
1467
|
+
}
|
|
1371
1468
|
}
|
|
1372
1469
|
}
|
|
1373
1470
|
else {
|
|
@@ -1386,45 +1483,6 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1386
1483
|
}
|
|
1387
1484
|
exports.parse = parse;
|
|
1388
1485
|
});
|
|
1389
|
-
define("nodes/node", ["require", "exports", "he"], function (require, exports, he_3) {
|
|
1390
|
-
"use strict";
|
|
1391
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1392
|
-
/**
|
|
1393
|
-
* Node Class as base class for TextNode and HTMLElement.
|
|
1394
|
-
*/
|
|
1395
|
-
var Node = /** @class */ (function () {
|
|
1396
|
-
function Node(parentNode, range) {
|
|
1397
|
-
if (parentNode === void 0) { parentNode = null; }
|
|
1398
|
-
this.parentNode = parentNode;
|
|
1399
|
-
this.childNodes = [];
|
|
1400
|
-
Object.defineProperty(this, 'range', {
|
|
1401
|
-
enumerable: false,
|
|
1402
|
-
writable: true,
|
|
1403
|
-
configurable: true,
|
|
1404
|
-
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
1405
|
-
});
|
|
1406
|
-
}
|
|
1407
|
-
Object.defineProperty(Node.prototype, "innerText", {
|
|
1408
|
-
get: function () {
|
|
1409
|
-
return this.rawText;
|
|
1410
|
-
},
|
|
1411
|
-
enumerable: false,
|
|
1412
|
-
configurable: true
|
|
1413
|
-
});
|
|
1414
|
-
Object.defineProperty(Node.prototype, "textContent", {
|
|
1415
|
-
get: function () {
|
|
1416
|
-
return (0, he_3.decode)(this.rawText);
|
|
1417
|
-
},
|
|
1418
|
-
set: function (val) {
|
|
1419
|
-
this.rawText = (0, he_3.encode)(val);
|
|
1420
|
-
},
|
|
1421
|
-
enumerable: false,
|
|
1422
|
-
configurable: true
|
|
1423
|
-
});
|
|
1424
|
-
return Node;
|
|
1425
|
-
}());
|
|
1426
|
-
exports.default = Node;
|
|
1427
|
-
});
|
|
1428
1486
|
define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], function (require, exports, node_3, type_4) {
|
|
1429
1487
|
"use strict";
|
|
1430
1488
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -1442,6 +1500,9 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
|
|
|
1442
1500
|
_this.nodeType = type_4.default.COMMENT_NODE;
|
|
1443
1501
|
return _this;
|
|
1444
1502
|
}
|
|
1503
|
+
CommentNode.prototype.clone = function () {
|
|
1504
|
+
return new CommentNode(this.rawText, null);
|
|
1505
|
+
};
|
|
1445
1506
|
Object.defineProperty(CommentNode.prototype, "text", {
|
|
1446
1507
|
/**
|
|
1447
1508
|
* Get unescaped text value of current node and its children.
|
|
@@ -1454,7 +1515,7 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
|
|
|
1454
1515
|
configurable: true
|
|
1455
1516
|
});
|
|
1456
1517
|
CommentNode.prototype.toString = function () {
|
|
1457
|
-
return "<!--"
|
|
1518
|
+
return "<!--".concat(this.rawText, "-->");
|
|
1458
1519
|
};
|
|
1459
1520
|
return CommentNode;
|
|
1460
1521
|
}(node_3.default));
|
package/dist/nodes/comment.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import HTMLElement from './html';
|
|
1
2
|
import Node from './node';
|
|
2
3
|
import NodeType from './type';
|
|
3
|
-
import HTMLElement from './html';
|
|
4
4
|
export default class CommentNode extends Node {
|
|
5
5
|
rawText: string;
|
|
6
|
+
clone(): CommentNode;
|
|
6
7
|
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
7
8
|
/**
|
|
8
9
|
* Node Type declaration.
|
package/dist/nodes/comment.js
CHANGED
|
@@ -32,6 +32,9 @@ var CommentNode = /** @class */ (function (_super) {
|
|
|
32
32
|
_this.nodeType = type_1.default.COMMENT_NODE;
|
|
33
33
|
return _this;
|
|
34
34
|
}
|
|
35
|
+
CommentNode.prototype.clone = function () {
|
|
36
|
+
return new CommentNode(this.rawText, null);
|
|
37
|
+
};
|
|
35
38
|
Object.defineProperty(CommentNode.prototype, "text", {
|
|
36
39
|
/**
|
|
37
40
|
* Get unescaped text value of current node and its children.
|
|
@@ -44,7 +47,7 @@ var CommentNode = /** @class */ (function (_super) {
|
|
|
44
47
|
configurable: true
|
|
45
48
|
});
|
|
46
49
|
CommentNode.prototype.toString = function () {
|
|
47
|
-
return "<!--"
|
|
50
|
+
return "<!--".concat(this.rawText, "-->");
|
|
48
51
|
};
|
|
49
52
|
return CommentNode;
|
|
50
53
|
}(node_1.default));
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -76,6 +76,7 @@ export default class HTMLElement extends Node {
|
|
|
76
76
|
*/
|
|
77
77
|
exchangeChild(oldNode: Node, newNode: Node): void;
|
|
78
78
|
get tagName(): string;
|
|
79
|
+
set tagName(newname: string);
|
|
79
80
|
get localName(): string;
|
|
80
81
|
get isVoidElement(): boolean;
|
|
81
82
|
/**
|
|
@@ -188,11 +189,18 @@ export default class HTMLElement extends Node {
|
|
|
188
189
|
insertAdjacentHTML(where: InsertPosition, html: string): void;
|
|
189
190
|
get nextSibling(): Node;
|
|
190
191
|
get nextElementSibling(): HTMLElement;
|
|
192
|
+
get previousSibling(): Node;
|
|
193
|
+
get previousElementSibling(): HTMLElement;
|
|
191
194
|
get classNames(): string;
|
|
195
|
+
/**
|
|
196
|
+
* Clone this Node
|
|
197
|
+
*/
|
|
198
|
+
clone(): Node;
|
|
192
199
|
}
|
|
193
200
|
export interface Options {
|
|
194
201
|
lowerCaseTagName: boolean;
|
|
195
202
|
comment: boolean;
|
|
203
|
+
parseNoneClosedTags?: boolean;
|
|
196
204
|
blockTextElements: {
|
|
197
205
|
[tag: string]: boolean;
|
|
198
206
|
};
|
package/dist/nodes/html.js
CHANGED
|
@@ -39,14 +39,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
39
39
|
};
|
|
40
40
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
41
|
exports.parse = exports.base_parse = void 0;
|
|
42
|
-
var he_1 = __importDefault(require("he"));
|
|
43
42
|
var css_select_1 = require("css-select");
|
|
44
|
-
var
|
|
45
|
-
var type_1 = __importDefault(require("./type"));
|
|
46
|
-
var text_1 = __importDefault(require("./text"));
|
|
47
|
-
var matcher_1 = __importDefault(require("../matcher"));
|
|
43
|
+
var he_1 = __importDefault(require("he"));
|
|
48
44
|
var back_1 = __importDefault(require("../back"));
|
|
45
|
+
var matcher_1 = __importDefault(require("../matcher"));
|
|
49
46
|
var comment_1 = __importDefault(require("./comment"));
|
|
47
|
+
var node_1 = __importDefault(require("./node"));
|
|
48
|
+
var text_1 = __importDefault(require("./text"));
|
|
49
|
+
var type_1 = __importDefault(require("./type"));
|
|
50
50
|
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
51
51
|
function decode(val) {
|
|
52
52
|
// clone string
|
|
@@ -86,7 +86,7 @@ var DOMTokenList = /** @class */ (function () {
|
|
|
86
86
|
}
|
|
87
87
|
DOMTokenList.prototype._validate = function (c) {
|
|
88
88
|
if (/\s/.test(c)) {
|
|
89
|
-
throw new Error("DOMException in DOMTokenList.add: The token '"
|
|
89
|
+
throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
|
|
90
90
|
}
|
|
91
91
|
};
|
|
92
92
|
DOMTokenList.prototype.add = function (c) {
|
|
@@ -170,14 +170,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
170
170
|
);
|
|
171
171
|
if (keyAttrs.id) {
|
|
172
172
|
if (!rawAttrs) {
|
|
173
|
-
_this.rawAttrs = "id=\""
|
|
173
|
+
_this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
176
|
if (keyAttrs.class) {
|
|
177
177
|
if (!rawAttrs) {
|
|
178
|
-
var cls = "class=\""
|
|
178
|
+
var cls = "class=\"".concat(_this.classList.toString(), "\"");
|
|
179
179
|
if (_this.rawAttrs) {
|
|
180
|
-
_this.rawAttrs += " "
|
|
180
|
+
_this.rawAttrs += " ".concat(cls);
|
|
181
181
|
}
|
|
182
182
|
else {
|
|
183
183
|
_this.rawAttrs = cls;
|
|
@@ -236,6 +236,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
236
236
|
get: function () {
|
|
237
237
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
238
238
|
},
|
|
239
|
+
set: function (newname) {
|
|
240
|
+
this.rawTagName = newname.toLowerCase();
|
|
241
|
+
},
|
|
239
242
|
enumerable: false,
|
|
240
243
|
configurable: true
|
|
241
244
|
});
|
|
@@ -319,7 +322,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
319
322
|
else {
|
|
320
323
|
var text = node.trimmedText;
|
|
321
324
|
if (currentBlock.prependWhitespace) {
|
|
322
|
-
text = " "
|
|
325
|
+
text = " ".concat(text);
|
|
323
326
|
currentBlock.prependWhitespace = false;
|
|
324
327
|
}
|
|
325
328
|
currentBlock.push(text);
|
|
@@ -340,8 +343,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
340
343
|
HTMLElement.prototype.toString = function () {
|
|
341
344
|
var tag = this.rawTagName;
|
|
342
345
|
if (tag) {
|
|
343
|
-
var attrs = this.rawAttrs ? " "
|
|
344
|
-
return this.isVoidElement ? "<"
|
|
346
|
+
var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
|
|
347
|
+
return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
|
|
345
348
|
}
|
|
346
349
|
return this.innerHTML;
|
|
347
350
|
};
|
|
@@ -437,9 +440,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
437
440
|
res.push(' '.repeat(indention) + str);
|
|
438
441
|
}
|
|
439
442
|
function dfs(node) {
|
|
440
|
-
var idStr = node.id ? "#"
|
|
441
|
-
var classStr = node.classList.length ? "."
|
|
442
|
-
write(""
|
|
443
|
+
var idStr = node.id ? "#".concat(node.id) : '';
|
|
444
|
+
var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
445
|
+
write("".concat(node.rawTagName).concat(idStr).concat(classStr));
|
|
443
446
|
indention++;
|
|
444
447
|
node.childNodes.forEach(function (childNode) {
|
|
445
448
|
if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
|
|
@@ -698,7 +701,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
698
701
|
if (val === undefined || val === 'null') {
|
|
699
702
|
return name;
|
|
700
703
|
}
|
|
701
|
-
return name
|
|
704
|
+
return "".concat(name, "=").concat(val);
|
|
702
705
|
})
|
|
703
706
|
.join(' ');
|
|
704
707
|
// Update this.id
|
|
@@ -745,7 +748,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
745
748
|
var val = _this.quoteAttribute(attrs[name]);
|
|
746
749
|
if (val === 'null' || val === '""')
|
|
747
750
|
return name;
|
|
748
|
-
return name
|
|
751
|
+
return "".concat(name, "=").concat(val);
|
|
749
752
|
})
|
|
750
753
|
.join(' ');
|
|
751
754
|
// Update this.id
|
|
@@ -773,7 +776,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
773
776
|
var val = attributes[name];
|
|
774
777
|
if (val === 'null' || val === '""')
|
|
775
778
|
return name;
|
|
776
|
-
return name
|
|
779
|
+
return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
|
|
777
780
|
})
|
|
778
781
|
.join(' ');
|
|
779
782
|
};
|
|
@@ -815,7 +818,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
815
818
|
});
|
|
816
819
|
}
|
|
817
820
|
else {
|
|
818
|
-
throw new Error("The value provided ('"
|
|
821
|
+
throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
|
|
819
822
|
}
|
|
820
823
|
// if (!where || html === undefined || html === null) {
|
|
821
824
|
// return;
|
|
@@ -860,6 +863,45 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
860
863
|
enumerable: false,
|
|
861
864
|
configurable: true
|
|
862
865
|
});
|
|
866
|
+
Object.defineProperty(HTMLElement.prototype, "previousSibling", {
|
|
867
|
+
get: function () {
|
|
868
|
+
if (this.parentNode) {
|
|
869
|
+
var children = this.parentNode.childNodes;
|
|
870
|
+
var i = children.length;
|
|
871
|
+
while (i > 0) {
|
|
872
|
+
var child = children[--i];
|
|
873
|
+
if (this === child)
|
|
874
|
+
return children[i - 1] || null;
|
|
875
|
+
}
|
|
876
|
+
return null;
|
|
877
|
+
}
|
|
878
|
+
},
|
|
879
|
+
enumerable: false,
|
|
880
|
+
configurable: true
|
|
881
|
+
});
|
|
882
|
+
Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
|
|
883
|
+
get: function () {
|
|
884
|
+
if (this.parentNode) {
|
|
885
|
+
var children = this.parentNode.childNodes;
|
|
886
|
+
var i = children.length;
|
|
887
|
+
var find = false;
|
|
888
|
+
while (i > 0) {
|
|
889
|
+
var child = children[--i];
|
|
890
|
+
if (find) {
|
|
891
|
+
if (child instanceof HTMLElement) {
|
|
892
|
+
return child || null;
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
else if (this === child) {
|
|
896
|
+
find = true;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
return null;
|
|
900
|
+
}
|
|
901
|
+
},
|
|
902
|
+
enumerable: false,
|
|
903
|
+
configurable: true
|
|
904
|
+
});
|
|
863
905
|
Object.defineProperty(HTMLElement.prototype, "classNames", {
|
|
864
906
|
get: function () {
|
|
865
907
|
return this.classList.toString();
|
|
@@ -867,6 +909,12 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
867
909
|
enumerable: false,
|
|
868
910
|
configurable: true
|
|
869
911
|
});
|
|
912
|
+
/**
|
|
913
|
+
* Clone this Node
|
|
914
|
+
*/
|
|
915
|
+
HTMLElement.prototype.clone = function () {
|
|
916
|
+
return parse(this.toString()).firstChild;
|
|
917
|
+
};
|
|
870
918
|
return HTMLElement;
|
|
871
919
|
}(node_1.default));
|
|
872
920
|
exports.default = HTMLElement;
|
|
@@ -959,8 +1007,8 @@ function base_parse(data, options) {
|
|
|
959
1007
|
pre: true,
|
|
960
1008
|
};
|
|
961
1009
|
var element_names = Object.keys(elements);
|
|
962
|
-
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^"
|
|
963
|
-
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^"
|
|
1010
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1011
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
964
1012
|
function element_should_be_ignore(tag) {
|
|
965
1013
|
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
966
1014
|
}
|
|
@@ -975,7 +1023,7 @@ function base_parse(data, options) {
|
|
|
975
1023
|
var noNestedTagIndex = undefined;
|
|
976
1024
|
var match;
|
|
977
1025
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
978
|
-
data = "<"
|
|
1026
|
+
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
979
1027
|
var lowerCaseTagName = options.lowerCaseTagName;
|
|
980
1028
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
981
1029
|
var frameFlagOffset = frameflag.length + 2;
|
|
@@ -1043,7 +1091,7 @@ function base_parse(data, options) {
|
|
|
1043
1091
|
stack.push(currentParent);
|
|
1044
1092
|
if (is_block_text_element(tagName)) {
|
|
1045
1093
|
// Find closing tag
|
|
1046
|
-
var closeMarkup = "</"
|
|
1094
|
+
var closeMarkup = "</".concat(tagName, ">");
|
|
1047
1095
|
var closeIndex = lowerCaseTagName
|
|
1048
1096
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1049
1097
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
@@ -1110,18 +1158,25 @@ function parse(data, options) {
|
|
|
1110
1158
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1111
1159
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1112
1160
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
oneBefore.
|
|
1116
|
-
|
|
1117
|
-
|
|
1161
|
+
// this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
|
|
1162
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1163
|
+
oneBefore.removeChild(last);
|
|
1164
|
+
last.childNodes.forEach(function (child) {
|
|
1165
|
+
oneBefore.parentNode.appendChild(child);
|
|
1166
|
+
});
|
|
1167
|
+
stack.pop();
|
|
1168
|
+
}
|
|
1118
1169
|
}
|
|
1119
1170
|
else {
|
|
1120
1171
|
// Single error <div> <h3> </div> handle: Just removes <h3>
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1172
|
+
// Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
|
|
1173
|
+
// eslint-disable-next-line no-lonely-if
|
|
1174
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1175
|
+
oneBefore.removeChild(last);
|
|
1176
|
+
last.childNodes.forEach(function (child) {
|
|
1177
|
+
oneBefore.appendChild(child);
|
|
1178
|
+
});
|
|
1179
|
+
}
|
|
1125
1180
|
}
|
|
1126
1181
|
}
|
|
1127
1182
|
else {
|
package/dist/nodes/node.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export default abstract class Node {
|
|
|
11
11
|
abstract text: string;
|
|
12
12
|
abstract rawText: string;
|
|
13
13
|
abstract toString(): string;
|
|
14
|
+
abstract clone(): Node;
|
|
14
15
|
constructor(parentNode?: HTMLElement, range?: [number, number]);
|
|
15
16
|
get innerText(): string;
|
|
16
17
|
get textContent(): string;
|
package/dist/nodes/text.d.ts
CHANGED
package/dist/nodes/text.js
CHANGED
|
@@ -37,6 +37,9 @@ var TextNode = /** @class */ (function (_super) {
|
|
|
37
37
|
_this._rawText = rawText;
|
|
38
38
|
return _this;
|
|
39
39
|
}
|
|
40
|
+
TextNode.prototype.clone = function () {
|
|
41
|
+
return new TextNode(this._rawText, null);
|
|
42
|
+
};
|
|
40
43
|
Object.defineProperty(TextNode.prototype, "rawText", {
|
|
41
44
|
get: function () {
|
|
42
45
|
return this._rawText;
|
package/package.json
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "5.
|
|
3
|
+
"version": "5.2.5",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
"require": "./dist/index.js",
|
|
9
|
+
"import": "./esm/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts"
|
|
11
|
+
},
|
|
7
12
|
"scripts": {
|
|
8
13
|
"compile": "tsc",
|
|
9
14
|
"build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
|
|
@@ -48,7 +53,7 @@
|
|
|
48
53
|
"registry": "https://registry.npmjs.org"
|
|
49
54
|
},
|
|
50
55
|
"dependencies": {
|
|
51
|
-
"css-select": "^4.1
|
|
56
|
+
"css-select": "^4.2.1",
|
|
52
57
|
"he": "1.2.0"
|
|
53
58
|
},
|
|
54
59
|
"devDependencies": {
|
|
@@ -60,25 +65,31 @@
|
|
|
60
65
|
"@typescript-eslint/parser": "latest",
|
|
61
66
|
"blanket": "latest",
|
|
62
67
|
"cheerio": "^1.0.0-rc.5",
|
|
63
|
-
"
|
|
68
|
+
"cross-env": "^7.0.3",
|
|
64
69
|
"eslint": "^7.32.0",
|
|
65
70
|
"eslint-config-prettier": "latest",
|
|
66
71
|
"eslint-plugin-import": "latest",
|
|
67
72
|
"high5": "^1.0.0",
|
|
73
|
+
"html-dom-parser": "^1.0.4",
|
|
74
|
+
"html-parser": "^0.11.0",
|
|
75
|
+
"html5parser": "^2.0.2",
|
|
76
|
+
"htmljs-parser": "^2.11.1",
|
|
68
77
|
"htmlparser": "^1.7.7",
|
|
69
78
|
"htmlparser-benchmark": "^1.1.3",
|
|
70
79
|
"htmlparser2": "^6.0.0",
|
|
71
80
|
"mocha": "latest",
|
|
72
81
|
"mocha-each": "^2.0.1",
|
|
82
|
+
"neutron-html5parser": "^0.2.0",
|
|
73
83
|
"np": "latest",
|
|
74
84
|
"parse5": "^6.0.1",
|
|
85
|
+
"rimraf": "^3.0.2",
|
|
86
|
+
"saxes": "^6.0.0",
|
|
75
87
|
"should": "latest",
|
|
76
88
|
"spec": "latest",
|
|
77
89
|
"standard-version": "^9.3.1",
|
|
78
90
|
"travis-cov": "latest",
|
|
79
91
|
"ts-node": "^10.2.1",
|
|
80
|
-
"typescript": "latest"
|
|
81
|
-
"cross-env": "^7.0.3"
|
|
92
|
+
"typescript": "latest"
|
|
82
93
|
},
|
|
83
94
|
"config": {
|
|
84
95
|
"blanket": {
|
|
@@ -102,9 +113,5 @@
|
|
|
102
113
|
"url": "https://github.com/taoqf/node-fast-html-parser/issues"
|
|
103
114
|
},
|
|
104
115
|
"homepage": "https://github.com/taoqf/node-fast-html-parser",
|
|
105
|
-
"sideEffects": false
|
|
106
|
-
"exports": {
|
|
107
|
-
"require": "./dist/index.js",
|
|
108
|
-
"import": "./esm/index.js"
|
|
109
|
-
}
|
|
116
|
+
"sideEffects": false
|
|
110
117
|
}
|