node-html-parser 6.0.0 → 6.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/README.md +127 -47
- package/dist/index.js +1 -4
- package/dist/main.js +17 -17
- package/dist/nodes/html.d.ts +6 -5
- package/dist/nodes/html.js +15 -12
- package/dist/valid.js +1 -1
- package/package.json +11 -10
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,20 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [6.1.1](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.0...v6.1.1) (2022-09-24)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Bug Fixes
|
|
9
|
+
|
|
10
|
+
* parse comments ([82b68ff](https://github.com/taoqf/node-fast-html-parser/commit/82b68ff9eb944e0c55ca2e0ea13fb714e2004803))
|
|
11
|
+
|
|
12
|
+
## [6.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v6.0.0...v6.1.0) (2022-09-19)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
### Features
|
|
16
|
+
|
|
17
|
+
* Add docs ([8a38eed](https://github.com/taoqf/node-fast-html-parser/commit/8a38eedab6b20906ee89dea86c4271960afbad2d))
|
|
18
|
+
|
|
5
19
|
## [6.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v6.0.0) (2022-09-08)
|
|
6
20
|
|
|
7
21
|
|
package/README.md
CHANGED
|
@@ -101,186 +101,266 @@ Parse the data provided, and return the root of the generated DOM.
|
|
|
101
101
|
|
|
102
102
|
Parse the data provided, return true if the given data is valid, and return false if not.
|
|
103
103
|
|
|
104
|
+
## Class
|
|
105
|
+
|
|
106
|
+
```mermaid
|
|
107
|
+
classDiagram
|
|
108
|
+
direction TB
|
|
109
|
+
class HTMLElement{
|
|
110
|
+
this trimRight()
|
|
111
|
+
this removeWhitespace()
|
|
112
|
+
Node[] querySelectorAll(string selector)
|
|
113
|
+
Node querySelector(string selector)
|
|
114
|
+
HTMLElement[] getElementsByTagName(string tagName)
|
|
115
|
+
Node closest(string selector)
|
|
116
|
+
Node appendChild(Node node)
|
|
117
|
+
this insertAdjacentHTML('beforebegin' | 'afterbegin' | 'beforeend' | 'afterend' where, string html)
|
|
118
|
+
this setAttribute(string key, string value)
|
|
119
|
+
this setAttributes(Record~string, string~ attrs)
|
|
120
|
+
this removeAttribute(string key)
|
|
121
|
+
string getAttribute(string key)
|
|
122
|
+
this exchangeChild(Node oldNode, Node newNode)
|
|
123
|
+
this removeChild(Node node)
|
|
124
|
+
string toString()
|
|
125
|
+
this set_content(string content)
|
|
126
|
+
this set_content(Node content)
|
|
127
|
+
this set_content(Node[] content)
|
|
128
|
+
this remove()
|
|
129
|
+
this replaceWith((string | Node)[] ...nodes)
|
|
130
|
+
ClassList classList
|
|
131
|
+
HTMLElement clone()
|
|
132
|
+
HTMLElement getElementById(string id)
|
|
133
|
+
string text
|
|
134
|
+
string rawText
|
|
135
|
+
string tagName
|
|
136
|
+
string structuredText
|
|
137
|
+
string structure
|
|
138
|
+
Node firstChild
|
|
139
|
+
Node lastChild
|
|
140
|
+
Node nextSibling
|
|
141
|
+
HTMLElement nextElementSibling
|
|
142
|
+
Node previousSibling
|
|
143
|
+
HTMLElement previousElementSibling
|
|
144
|
+
string innerHTML
|
|
145
|
+
string outerHTML
|
|
146
|
+
string textContent
|
|
147
|
+
Record~string, string~ attributes
|
|
148
|
+
[number, number] range
|
|
149
|
+
}
|
|
150
|
+
class Node{
|
|
151
|
+
<<abstract>>
|
|
152
|
+
string toString()
|
|
153
|
+
Node clone()
|
|
154
|
+
this remove()
|
|
155
|
+
number nodeType
|
|
156
|
+
string innerText
|
|
157
|
+
string textContent
|
|
158
|
+
}
|
|
159
|
+
class ClassList{
|
|
160
|
+
add(string c)
|
|
161
|
+
replace(string c1, string c2)
|
|
162
|
+
remove(string c)
|
|
163
|
+
toggle(string c)
|
|
164
|
+
boolean contains(string c)
|
|
165
|
+
number length
|
|
166
|
+
string[] value
|
|
167
|
+
string toString()
|
|
168
|
+
}
|
|
169
|
+
class CommentNode{
|
|
170
|
+
CommentNode clone()
|
|
171
|
+
string toString()
|
|
172
|
+
}
|
|
173
|
+
class TextNode{
|
|
174
|
+
TextNode clone()
|
|
175
|
+
string toString()
|
|
176
|
+
string rawText
|
|
177
|
+
string trimmedRawText
|
|
178
|
+
string trimmedText
|
|
179
|
+
string text
|
|
180
|
+
boolean isWhitespace
|
|
181
|
+
}
|
|
182
|
+
Node --|> HTMLElement
|
|
183
|
+
Node --|> CommentNode
|
|
184
|
+
Node --|> TextNode
|
|
185
|
+
Node ..> ClassList
|
|
186
|
+
```
|
|
187
|
+
|
|
104
188
|
## HTMLElement Methods
|
|
105
189
|
|
|
106
|
-
###
|
|
190
|
+
### trimRight()
|
|
107
191
|
|
|
108
192
|
Trim element from right (in block) after seeing pattern in a TextNode.
|
|
109
193
|
|
|
110
|
-
###
|
|
194
|
+
### removeWhitespace()
|
|
111
195
|
|
|
112
196
|
Remove whitespaces in this sub tree.
|
|
113
197
|
|
|
114
|
-
###
|
|
198
|
+
### querySelectorAll(selector)
|
|
115
199
|
|
|
116
200
|
Query CSS selector to find matching nodes.
|
|
117
201
|
|
|
118
202
|
Note: Full range of CSS3 selectors supported since v3.0.0.
|
|
119
203
|
|
|
120
|
-
###
|
|
204
|
+
### querySelector(selector)
|
|
121
205
|
|
|
122
206
|
Query CSS Selector to find matching node.
|
|
123
207
|
|
|
124
|
-
###
|
|
208
|
+
### getElementsByTagName(tagName)
|
|
125
209
|
|
|
126
210
|
Get all elements with the specified tagName.
|
|
127
211
|
|
|
128
212
|
Note: Use * for all elements.
|
|
129
213
|
|
|
130
|
-
###
|
|
214
|
+
### closest(selector)
|
|
131
215
|
|
|
132
216
|
Query closest element by css selector.
|
|
133
217
|
|
|
134
|
-
###
|
|
218
|
+
### appendChild(node)
|
|
135
219
|
|
|
136
220
|
Append a child node to childNodes
|
|
137
221
|
|
|
138
|
-
###
|
|
222
|
+
### insertAdjacentHTML(where, html)
|
|
139
223
|
|
|
140
224
|
Parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
|
|
141
225
|
|
|
142
|
-
###
|
|
226
|
+
### setAttribute(key: string, value: string)
|
|
143
227
|
|
|
144
228
|
Set `value` to `key` attribute.
|
|
145
229
|
|
|
146
|
-
###
|
|
230
|
+
### setAttributes(attrs: Record<string, string>)
|
|
147
231
|
|
|
148
232
|
Set attributes of the element.
|
|
149
233
|
|
|
150
|
-
###
|
|
234
|
+
### removeAttribute(key: string)
|
|
151
235
|
|
|
152
236
|
Remove `key` attribute.
|
|
153
237
|
|
|
154
|
-
###
|
|
238
|
+
### getAttribute(key: string)
|
|
155
239
|
|
|
156
240
|
Get `key` attribute.
|
|
157
241
|
|
|
158
|
-
###
|
|
242
|
+
### exchangeChild(oldNode: Node, newNode: Node)
|
|
159
243
|
|
|
160
244
|
Exchanges given child with new child.
|
|
161
245
|
|
|
162
|
-
###
|
|
246
|
+
### removeChild(node: Node)
|
|
163
247
|
|
|
164
248
|
Remove child node.
|
|
165
249
|
|
|
166
|
-
###
|
|
250
|
+
### toString()
|
|
167
251
|
|
|
168
252
|
Same as [outerHTML](#htmlelementouterhtml)
|
|
169
253
|
|
|
170
|
-
###
|
|
254
|
+
### set_content(content: string | Node | Node[])
|
|
171
255
|
|
|
172
256
|
Set content. **Notice**: Do not set content of the **root** node.
|
|
173
257
|
|
|
174
|
-
###
|
|
258
|
+
### remove()
|
|
175
259
|
|
|
176
260
|
Remove current element.
|
|
177
261
|
|
|
178
|
-
###
|
|
262
|
+
### replaceWith(...nodes: (string | Node)[])
|
|
179
263
|
|
|
180
264
|
Replace current element with other node(s).
|
|
181
265
|
|
|
182
|
-
###
|
|
266
|
+
### classList
|
|
183
267
|
|
|
184
|
-
####
|
|
268
|
+
#### classList.add
|
|
185
269
|
|
|
186
270
|
Add class name.
|
|
187
271
|
|
|
188
|
-
####
|
|
272
|
+
#### classList.replace(old: string, new: string)
|
|
189
273
|
|
|
190
274
|
Replace class name with another one.
|
|
191
275
|
|
|
192
|
-
####
|
|
276
|
+
#### classList.remove()
|
|
193
277
|
|
|
194
278
|
Remove class name.
|
|
195
279
|
|
|
196
|
-
####
|
|
280
|
+
#### classList.toggle(className: string):void
|
|
197
281
|
|
|
198
282
|
Toggle class. Remove it if it is already included, otherwise add.
|
|
199
283
|
|
|
200
|
-
####
|
|
284
|
+
#### classList.contains(className: string): boolean
|
|
201
285
|
|
|
202
286
|
Returns true if the classname is already in the classList.
|
|
203
287
|
|
|
204
|
-
####
|
|
288
|
+
#### classList.value
|
|
205
289
|
|
|
206
290
|
Get class names.
|
|
207
291
|
|
|
208
|
-
####
|
|
292
|
+
#### clone()
|
|
209
293
|
|
|
210
294
|
Clone a node.
|
|
211
295
|
|
|
212
|
-
####
|
|
296
|
+
#### getElementById(id: string): HTMLElement;
|
|
213
297
|
|
|
214
298
|
Get element by it's ID.
|
|
215
299
|
|
|
216
300
|
## HTMLElement Properties
|
|
217
301
|
|
|
218
|
-
###
|
|
302
|
+
### text
|
|
219
303
|
|
|
220
304
|
Get unescaped text value of current node and its children. Like `innerText`.
|
|
221
305
|
(slow for the first time)
|
|
222
306
|
|
|
223
|
-
###
|
|
307
|
+
### rawText
|
|
224
308
|
|
|
225
309
|
Get escaped (as-is) text value of current node and its children. May have
|
|
226
310
|
`&` in it. (fast)
|
|
227
311
|
|
|
228
|
-
###
|
|
312
|
+
### tagName
|
|
229
313
|
|
|
230
314
|
Get or Set tag name of HTMLElement. Notice: the returned value would be an uppercase string.
|
|
231
315
|
|
|
232
|
-
###
|
|
316
|
+
### structuredText
|
|
233
317
|
|
|
234
318
|
Get structured Text.
|
|
235
319
|
|
|
236
|
-
###
|
|
320
|
+
### structure
|
|
237
321
|
|
|
238
322
|
Get DOM structure.
|
|
239
323
|
|
|
240
|
-
###
|
|
324
|
+
### firstChild
|
|
241
325
|
|
|
242
326
|
Get first child node.
|
|
243
327
|
|
|
244
|
-
###
|
|
328
|
+
### lastChild
|
|
245
329
|
|
|
246
330
|
Get last child node.
|
|
247
331
|
|
|
248
|
-
###
|
|
332
|
+
### innerHTML
|
|
249
333
|
|
|
250
334
|
Set or Get innerHTML.
|
|
251
335
|
|
|
252
|
-
###
|
|
336
|
+
### outerHTML
|
|
253
337
|
|
|
254
338
|
Get outerHTML.
|
|
255
339
|
|
|
256
|
-
###
|
|
340
|
+
### nextSibling
|
|
257
341
|
|
|
258
342
|
Returns a reference to the next child node of the current element's parent.
|
|
259
343
|
|
|
260
|
-
###
|
|
344
|
+
### nextElementSibling
|
|
261
345
|
|
|
262
346
|
Returns a reference to the next child element of the current element's parent.
|
|
263
347
|
|
|
264
|
-
###
|
|
348
|
+
### previousSibling
|
|
265
349
|
|
|
266
350
|
Returns a reference to the previous child node of the current element's parent.
|
|
267
351
|
|
|
268
|
-
###
|
|
352
|
+
### previousElementSibling
|
|
269
353
|
|
|
270
354
|
Returns a reference to the previous child element of the current element's parent.
|
|
271
355
|
|
|
272
|
-
###
|
|
356
|
+
### textContent
|
|
273
357
|
|
|
274
358
|
Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
|
|
275
359
|
|
|
276
|
-
###
|
|
277
|
-
|
|
278
|
-
Get all attributes of current element. **Notice: do not try to change the returned value.**
|
|
279
|
-
|
|
280
|
-
### HTMLElement#classList
|
|
360
|
+
### attributes
|
|
281
361
|
|
|
282
362
|
Get all attributes of current element. **Notice: do not try to change the returned value.**
|
|
283
363
|
|
|
284
|
-
###
|
|
364
|
+
### range
|
|
285
365
|
|
|
286
366
|
Corresponding source code start and end indexes (ie [ 0, 40 ])
|
package/dist/index.js
CHANGED
|
@@ -18,10 +18,7 @@ var parse_1 = __importDefault(require("./parse"));
|
|
|
18
18
|
var valid_1 = __importDefault(require("./valid"));
|
|
19
19
|
exports.valid = valid_1.default;
|
|
20
20
|
function parse(data, options) {
|
|
21
|
-
if (options === void 0) { options = {
|
|
22
|
-
lowerCaseTagName: false,
|
|
23
|
-
comment: false
|
|
24
|
-
}; }
|
|
21
|
+
if (options === void 0) { options = {}; }
|
|
25
22
|
return (0, parse_1.default)(data, options);
|
|
26
23
|
}
|
|
27
24
|
exports.default = parse;
|
package/dist/main.js
CHANGED
|
@@ -487,9 +487,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
487
487
|
*
|
|
488
488
|
* @memberof HTMLElement
|
|
489
489
|
*/
|
|
490
|
-
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
|
|
490
|
+
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag, _parseOptions) {
|
|
491
491
|
if (rawAttrs === void 0) { rawAttrs = ''; }
|
|
492
492
|
if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
|
|
493
|
+
if (_parseOptions === void 0) { _parseOptions = {}; }
|
|
493
494
|
var _this = _super.call(this, parentNode, range) || this;
|
|
494
495
|
_this.rawAttrs = rawAttrs;
|
|
495
496
|
_this.voidTag = voidTag;
|
|
@@ -501,6 +502,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
501
502
|
_this.rawAttrs = rawAttrs || '';
|
|
502
503
|
_this.id = keyAttrs.id || '';
|
|
503
504
|
_this.childNodes = [];
|
|
505
|
+
_this._parseOptions = _parseOptions;
|
|
504
506
|
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
505
507
|
);
|
|
506
508
|
if (keyAttrs.id) {
|
|
@@ -682,8 +684,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
682
684
|
.join('');
|
|
683
685
|
},
|
|
684
686
|
set: function (content) {
|
|
685
|
-
|
|
686
|
-
var r = parse(content);
|
|
687
|
+
var r = parse(content, this._parseOptions);
|
|
687
688
|
var nodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
|
|
688
689
|
resetParent(nodes, this);
|
|
689
690
|
resetParent(this.childNodes, null);
|
|
@@ -698,8 +699,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
698
699
|
content = [content];
|
|
699
700
|
}
|
|
700
701
|
else if (typeof content == 'string') {
|
|
702
|
+
options = __assign(__assign({}, this._parseOptions), options);
|
|
701
703
|
var r = parse(content, options);
|
|
702
|
-
content = r.childNodes.length ? r.childNodes : [new text_1.default(
|
|
704
|
+
content = r.childNodes.length ? r.childNodes : [new text_1.default(r.innerHTML, this)];
|
|
703
705
|
}
|
|
704
706
|
resetParent(this.childNodes, null);
|
|
705
707
|
resetParent(content, this);
|
|
@@ -719,8 +721,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
719
721
|
return [node];
|
|
720
722
|
}
|
|
721
723
|
else if (typeof node == 'string') {
|
|
722
|
-
|
|
723
|
-
var r = parse(node);
|
|
724
|
+
var r = parse(node, _this._parseOptions);
|
|
724
725
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
725
726
|
}
|
|
726
727
|
return [];
|
|
@@ -731,6 +732,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
731
732
|
});
|
|
732
733
|
resetParent([this], null);
|
|
733
734
|
parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), resetParent(content, parent), true), parent.childNodes.slice(idx + 1), true);
|
|
735
|
+
return this;
|
|
734
736
|
};
|
|
735
737
|
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
|
|
736
738
|
get: function () {
|
|
@@ -1127,6 +1129,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1127
1129
|
if (key === 'id') {
|
|
1128
1130
|
this.id = value;
|
|
1129
1131
|
}
|
|
1132
|
+
return this;
|
|
1130
1133
|
};
|
|
1131
1134
|
/**
|
|
1132
1135
|
* Replace all the attributes of the HTMLElement by the provided attributes
|
|
@@ -1159,7 +1162,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1159
1162
|
if (arguments.length < 2) {
|
|
1160
1163
|
throw new Error('2 arguments required');
|
|
1161
1164
|
}
|
|
1162
|
-
var p = parse(html);
|
|
1165
|
+
var p = parse(html, this._parseOptions);
|
|
1163
1166
|
if (where === 'afterend') {
|
|
1164
1167
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
1165
1168
|
return child === _this;
|
|
@@ -1280,7 +1283,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1280
1283
|
* Clone this Node
|
|
1281
1284
|
*/
|
|
1282
1285
|
HTMLElement.prototype.clone = function () {
|
|
1283
|
-
return parse(this.toString()).firstChild;
|
|
1286
|
+
return parse(this.toString(), this._parseOptions).firstChild;
|
|
1284
1287
|
};
|
|
1285
1288
|
return HTMLElement;
|
|
1286
1289
|
}(node_2.default));
|
|
@@ -1367,7 +1370,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1367
1370
|
*/
|
|
1368
1371
|
function base_parse(data, options) {
|
|
1369
1372
|
var _a, _b;
|
|
1370
|
-
if (options === void 0) { options = {
|
|
1373
|
+
if (options === void 0) { options = {}; }
|
|
1371
1374
|
var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
|
|
1372
1375
|
var elements = options.blockTextElements || {
|
|
1373
1376
|
script: true,
|
|
@@ -1385,7 +1388,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1385
1388
|
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1386
1389
|
}
|
|
1387
1390
|
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1388
|
-
var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
|
|
1391
|
+
var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag, options);
|
|
1389
1392
|
var currentParent = root;
|
|
1390
1393
|
var stack = [root];
|
|
1391
1394
|
var lastTextPos = -1;
|
|
@@ -1456,7 +1459,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1456
1459
|
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1457
1460
|
currentParent = currentParent.appendChild(
|
|
1458
1461
|
// Initialize range (end position updated later for closed tags)
|
|
1459
|
-
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
|
|
1462
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag, options));
|
|
1460
1463
|
stack.push(currentParent);
|
|
1461
1464
|
if (is_block_text_element(tagName)) {
|
|
1462
1465
|
// Find closing tag
|
|
@@ -1517,7 +1520,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1517
1520
|
* Parse a chuck of HTML source.
|
|
1518
1521
|
*/
|
|
1519
1522
|
function parse(data, options) {
|
|
1520
|
-
if (options === void 0) { options = {
|
|
1523
|
+
if (options === void 0) { options = {}; }
|
|
1521
1524
|
var stack = base_parse(data, options);
|
|
1522
1525
|
var root = stack[0];
|
|
1523
1526
|
var _loop_1 = function () {
|
|
@@ -1622,7 +1625,7 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
|
|
|
1622
1625
|
* Parse a chuck of HTML source.
|
|
1623
1626
|
*/
|
|
1624
1627
|
function valid(data, options) {
|
|
1625
|
-
if (options === void 0) { options = {
|
|
1628
|
+
if (options === void 0) { options = {}; }
|
|
1626
1629
|
var stack = (0, html_2.base_parse)(data, options);
|
|
1627
1630
|
return Boolean(stack.length === 1);
|
|
1628
1631
|
}
|
|
@@ -1646,10 +1649,7 @@ define("index", ["require", "exports", "nodes/comment", "nodes/html", "nodes/nod
|
|
|
1646
1649
|
exports.NodeType = type_5.default;
|
|
1647
1650
|
exports.valid = valid_1.default;
|
|
1648
1651
|
function parse(data, options) {
|
|
1649
|
-
if (options === void 0) { options = {
|
|
1650
|
-
lowerCaseTagName: false,
|
|
1651
|
-
comment: false
|
|
1652
|
-
}; }
|
|
1652
|
+
if (options === void 0) { options = {}; }
|
|
1653
1653
|
return (0, parse_1.default)(data, options);
|
|
1654
1654
|
}
|
|
1655
1655
|
exports.default = parse;
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -41,6 +41,7 @@ export default class HTMLElement extends Node {
|
|
|
41
41
|
private voidTag;
|
|
42
42
|
private _attrs;
|
|
43
43
|
private _rawAttrs;
|
|
44
|
+
private _parseOptions;
|
|
44
45
|
rawTagName: string;
|
|
45
46
|
id: string;
|
|
46
47
|
classList: DOMTokenList;
|
|
@@ -61,7 +62,7 @@ export default class HTMLElement extends Node {
|
|
|
61
62
|
*
|
|
62
63
|
* @memberof HTMLElement
|
|
63
64
|
*/
|
|
64
|
-
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range: [number, number], voidTag?: VoidTag);
|
|
65
|
+
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range: [number, number], voidTag?: VoidTag, _parseOptions?: Partial<Options>);
|
|
65
66
|
/**
|
|
66
67
|
* Remove Child element from childNodes array
|
|
67
68
|
* @param {HTMLElement} node node to remove
|
|
@@ -98,7 +99,7 @@ export default class HTMLElement extends Node {
|
|
|
98
99
|
get innerHTML(): string;
|
|
99
100
|
set innerHTML(content: string);
|
|
100
101
|
set_content(content: string | Node | Node[], options?: Partial<Options>): this;
|
|
101
|
-
replaceWith(...nodes: (string | Node)[]):
|
|
102
|
+
replaceWith(...nodes: (string | Node)[]): this;
|
|
102
103
|
get outerHTML(): string;
|
|
103
104
|
/**
|
|
104
105
|
* Trim element from right (in block) after seeing pattern in a TextNode.
|
|
@@ -183,7 +184,7 @@ export default class HTMLElement extends Node {
|
|
|
183
184
|
* @param {string} key The attribute name
|
|
184
185
|
* @param {string} value The value to set, or null / undefined to remove an attribute
|
|
185
186
|
*/
|
|
186
|
-
setAttribute(key: string, value: string):
|
|
187
|
+
setAttribute(key: string, value: string): this;
|
|
187
188
|
/**
|
|
188
189
|
* Replace all the attributes of the HTMLElement by the provided attributes
|
|
189
190
|
* @param {Attributes} attributes the new attribute set
|
|
@@ -201,8 +202,8 @@ export default class HTMLElement extends Node {
|
|
|
201
202
|
clone(): Node;
|
|
202
203
|
}
|
|
203
204
|
export interface Options {
|
|
204
|
-
lowerCaseTagName
|
|
205
|
-
comment
|
|
205
|
+
lowerCaseTagName?: boolean;
|
|
206
|
+
comment?: boolean;
|
|
206
207
|
/**
|
|
207
208
|
* @see PR #215 for explanation
|
|
208
209
|
*/
|
package/dist/nodes/html.js
CHANGED
|
@@ -154,9 +154,10 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
154
154
|
*
|
|
155
155
|
* @memberof HTMLElement
|
|
156
156
|
*/
|
|
157
|
-
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag) {
|
|
157
|
+
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range, voidTag, _parseOptions) {
|
|
158
158
|
if (rawAttrs === void 0) { rawAttrs = ''; }
|
|
159
159
|
if (voidTag === void 0) { voidTag = new void_tag_1.default(); }
|
|
160
|
+
if (_parseOptions === void 0) { _parseOptions = {}; }
|
|
160
161
|
var _this = _super.call(this, parentNode, range) || this;
|
|
161
162
|
_this.rawAttrs = rawAttrs;
|
|
162
163
|
_this.voidTag = voidTag;
|
|
@@ -168,6 +169,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
168
169
|
_this.rawAttrs = rawAttrs || '';
|
|
169
170
|
_this.id = keyAttrs.id || '';
|
|
170
171
|
_this.childNodes = [];
|
|
172
|
+
_this._parseOptions = _parseOptions;
|
|
171
173
|
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
172
174
|
);
|
|
173
175
|
if (keyAttrs.id) {
|
|
@@ -349,8 +351,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
349
351
|
.join('');
|
|
350
352
|
},
|
|
351
353
|
set: function (content) {
|
|
352
|
-
|
|
353
|
-
var r = parse(content);
|
|
354
|
+
var r = parse(content, this._parseOptions);
|
|
354
355
|
var nodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
|
|
355
356
|
resetParent(nodes, this);
|
|
356
357
|
resetParent(this.childNodes, null);
|
|
@@ -365,8 +366,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
365
366
|
content = [content];
|
|
366
367
|
}
|
|
367
368
|
else if (typeof content == 'string') {
|
|
369
|
+
options = __assign(__assign({}, this._parseOptions), options);
|
|
368
370
|
var r = parse(content, options);
|
|
369
|
-
content = r.childNodes.length ? r.childNodes : [new text_1.default(
|
|
371
|
+
content = r.childNodes.length ? r.childNodes : [new text_1.default(r.innerHTML, this)];
|
|
370
372
|
}
|
|
371
373
|
resetParent(this.childNodes, null);
|
|
372
374
|
resetParent(content, this);
|
|
@@ -386,8 +388,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
386
388
|
return [node];
|
|
387
389
|
}
|
|
388
390
|
else if (typeof node == 'string') {
|
|
389
|
-
|
|
390
|
-
var r = parse(node);
|
|
391
|
+
var r = parse(node, _this._parseOptions);
|
|
391
392
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
392
393
|
}
|
|
393
394
|
return [];
|
|
@@ -398,6 +399,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
398
399
|
});
|
|
399
400
|
resetParent([this], null);
|
|
400
401
|
parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), resetParent(content, parent), true), parent.childNodes.slice(idx + 1), true);
|
|
402
|
+
return this;
|
|
401
403
|
};
|
|
402
404
|
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
|
|
403
405
|
get: function () {
|
|
@@ -794,6 +796,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
794
796
|
if (key === 'id') {
|
|
795
797
|
this.id = value;
|
|
796
798
|
}
|
|
799
|
+
return this;
|
|
797
800
|
};
|
|
798
801
|
/**
|
|
799
802
|
* Replace all the attributes of the HTMLElement by the provided attributes
|
|
@@ -826,7 +829,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
826
829
|
if (arguments.length < 2) {
|
|
827
830
|
throw new Error('2 arguments required');
|
|
828
831
|
}
|
|
829
|
-
var p = parse(html);
|
|
832
|
+
var p = parse(html, this._parseOptions);
|
|
830
833
|
if (where === 'afterend') {
|
|
831
834
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
832
835
|
return child === _this;
|
|
@@ -947,7 +950,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
947
950
|
* Clone this Node
|
|
948
951
|
*/
|
|
949
952
|
HTMLElement.prototype.clone = function () {
|
|
950
|
-
return parse(this.toString()).firstChild;
|
|
953
|
+
return parse(this.toString(), this._parseOptions).firstChild;
|
|
951
954
|
};
|
|
952
955
|
return HTMLElement;
|
|
953
956
|
}(node_1.default));
|
|
@@ -1034,7 +1037,7 @@ var frameflag = 'documentfragmentcontainer';
|
|
|
1034
1037
|
*/
|
|
1035
1038
|
function base_parse(data, options) {
|
|
1036
1039
|
var _a, _b;
|
|
1037
|
-
if (options === void 0) { options = {
|
|
1040
|
+
if (options === void 0) { options = {}; }
|
|
1038
1041
|
var voidTag = new void_tag_1.default((_a = options === null || options === void 0 ? void 0 : options.voidTag) === null || _a === void 0 ? void 0 : _a.closingSlash, (_b = options === null || options === void 0 ? void 0 : options.voidTag) === null || _b === void 0 ? void 0 : _b.tags);
|
|
1039
1042
|
var elements = options.blockTextElements || {
|
|
1040
1043
|
script: true,
|
|
@@ -1052,7 +1055,7 @@ function base_parse(data, options) {
|
|
|
1052
1055
|
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1053
1056
|
}
|
|
1054
1057
|
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1055
|
-
var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag);
|
|
1058
|
+
var root = new HTMLElement(null, {}, '', null, [0, data.length], voidTag, options);
|
|
1056
1059
|
var currentParent = root;
|
|
1057
1060
|
var stack = [root];
|
|
1058
1061
|
var lastTextPos = -1;
|
|
@@ -1123,7 +1126,7 @@ function base_parse(data, options) {
|
|
|
1123
1126
|
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1124
1127
|
currentParent = currentParent.appendChild(
|
|
1125
1128
|
// Initialize range (end position updated later for closed tags)
|
|
1126
|
-
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag));
|
|
1129
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1), voidTag, options));
|
|
1127
1130
|
stack.push(currentParent);
|
|
1128
1131
|
if (is_block_text_element(tagName)) {
|
|
1129
1132
|
// Find closing tag
|
|
@@ -1184,7 +1187,7 @@ exports.base_parse = base_parse;
|
|
|
1184
1187
|
* Parse a chuck of HTML source.
|
|
1185
1188
|
*/
|
|
1186
1189
|
function parse(data, options) {
|
|
1187
|
-
if (options === void 0) { options = {
|
|
1190
|
+
if (options === void 0) { options = {}; }
|
|
1188
1191
|
var stack = base_parse(data, options);
|
|
1189
1192
|
var root = stack[0];
|
|
1190
1193
|
var _loop_1 = function () {
|
package/dist/valid.js
CHANGED
|
@@ -6,7 +6,7 @@ var html_1 = require("./nodes/html");
|
|
|
6
6
|
* Parse a chuck of HTML source.
|
|
7
7
|
*/
|
|
8
8
|
function valid(data, options) {
|
|
9
|
-
if (options === void 0) { options = {
|
|
9
|
+
if (options === void 0) { options = {}; }
|
|
10
10
|
var stack = (0, html_1.base_parse)(data, options);
|
|
11
11
|
return Boolean(stack.length === 1);
|
|
12
12
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.1.1",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -8,6 +8,7 @@
|
|
|
8
8
|
"compile": "tsc",
|
|
9
9
|
"build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
|
|
10
10
|
"compile:cjs": "tsc -m commonjs",
|
|
11
|
+
"watch": "npx tsc -m commonjs --watch --preserveWatchOutput",
|
|
11
12
|
"compile:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
|
|
12
13
|
"lint": "eslint ./src/*.ts ./src/**/*.ts",
|
|
13
14
|
"---------------": "",
|
|
@@ -47,7 +48,7 @@
|
|
|
47
48
|
"registry": "https://registry.npmjs.org"
|
|
48
49
|
},
|
|
49
50
|
"dependencies": {
|
|
50
|
-
"css-select": "^
|
|
51
|
+
"css-select": "^5.1.0",
|
|
51
52
|
"he": "1.2.0"
|
|
52
53
|
},
|
|
53
54
|
"devDependencies": {
|
|
@@ -58,31 +59,31 @@
|
|
|
58
59
|
"@typescript-eslint/eslint-plugin-tslint": "latest",
|
|
59
60
|
"@typescript-eslint/parser": "latest",
|
|
60
61
|
"blanket": "latest",
|
|
61
|
-
"cheerio": "^1.0.0-rc.
|
|
62
|
+
"cheerio": "^1.0.0-rc.12",
|
|
62
63
|
"cross-env": "^7.0.3",
|
|
63
|
-
"eslint": "^
|
|
64
|
+
"eslint": "^8.23.1",
|
|
64
65
|
"eslint-config-prettier": "latest",
|
|
65
66
|
"eslint-plugin-import": "latest",
|
|
66
67
|
"high5": "^1.0.0",
|
|
67
|
-
"html-dom-parser": "^1.
|
|
68
|
+
"html-dom-parser": "^3.1.2",
|
|
68
69
|
"html-parser": "^0.11.0",
|
|
69
70
|
"html5parser": "^2.0.2",
|
|
70
|
-
"htmljs-parser": "^
|
|
71
|
+
"htmljs-parser": "^5.1.4",
|
|
71
72
|
"htmlparser": "^1.7.7",
|
|
72
73
|
"htmlparser-benchmark": "^1.1.3",
|
|
73
|
-
"htmlparser2": "^
|
|
74
|
+
"htmlparser2": "^8.0.1",
|
|
74
75
|
"mocha": "latest",
|
|
75
76
|
"mocha-each": "^2.0.1",
|
|
76
77
|
"neutron-html5parser": "^0.2.0",
|
|
77
78
|
"np": "latest",
|
|
78
|
-
"parse5": "^
|
|
79
|
+
"parse5": "^7.1.1",
|
|
79
80
|
"rimraf": "^3.0.2",
|
|
80
81
|
"saxes": "^6.0.0",
|
|
81
82
|
"should": "latest",
|
|
82
83
|
"spec": "latest",
|
|
83
|
-
"standard-version": "^9.
|
|
84
|
+
"standard-version": "^9.5.0",
|
|
84
85
|
"travis-cov": "latest",
|
|
85
|
-
"ts-node": "^10.
|
|
86
|
+
"ts-node": "^10.9.1",
|
|
86
87
|
"typescript": "latest"
|
|
87
88
|
},
|
|
88
89
|
"config": {
|