node-html-parser 5.2.0 → 5.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -0
- package/dist/main.js +242 -184
- package/dist/nodes/comment.d.ts +2 -1
- package/dist/nodes/comment.js +4 -1
- package/dist/nodes/html.d.ts +7 -0
- package/dist/nodes/html.js +84 -32
- package/dist/nodes/node.d.ts +1 -0
- package/dist/nodes/text.d.ts +1 -0
- package/dist/nodes/text.js +3 -0
- package/package.json +8 -8
package/README.md
CHANGED
|
@@ -199,6 +199,10 @@ Returns true if the classname is already in the classList.
|
|
|
199
199
|
|
|
200
200
|
Get class names.
|
|
201
201
|
|
|
202
|
+
#### Node#clone()
|
|
203
|
+
|
|
204
|
+
Clone a node.
|
|
205
|
+
|
|
202
206
|
## HTMLElement Properties
|
|
203
207
|
|
|
204
208
|
### HTMLElement#text
|
|
@@ -247,6 +251,14 @@ Returns a reference to the next child node of the current element's parent.
|
|
|
247
251
|
|
|
248
252
|
Returns a reference to the next child element of the current element's parent.
|
|
249
253
|
|
|
254
|
+
### HTMLElement#previousSibling
|
|
255
|
+
|
|
256
|
+
Returns a reference to the previous child node of the current element's parent.
|
|
257
|
+
|
|
258
|
+
### HTMLElement#previousElementSibling
|
|
259
|
+
|
|
260
|
+
Returns a reference to the previous child element of the current element's parent.
|
|
261
|
+
|
|
250
262
|
### HTMLElement#textContent
|
|
251
263
|
|
|
252
264
|
Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
|
package/dist/main.js
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
2
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
3
|
+
};
|
|
1
4
|
var __extends = (this && this.__extends) || (function () {
|
|
2
5
|
var extendStatics = function (d, b) {
|
|
3
6
|
extendStatics = Object.setPrototypeOf ||
|
|
@@ -13,9 +16,6 @@ var __extends = (this && this.__extends) || (function () {
|
|
|
13
16
|
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
14
17
|
};
|
|
15
18
|
})();
|
|
16
|
-
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
17
|
-
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
18
|
-
};
|
|
19
19
|
var __assign = (this && this.__assign) || function () {
|
|
20
20
|
__assign = Object.assign || function(t) {
|
|
21
21
|
for (var s, i = 1, n = arguments.length; i < n; i++) {
|
|
@@ -55,134 +55,51 @@ define("nodes/type", ["require", "exports"], function (require, exports) {
|
|
|
55
55
|
})(NodeType || (NodeType = {}));
|
|
56
56
|
exports.default = NodeType;
|
|
57
57
|
});
|
|
58
|
-
define("nodes/
|
|
58
|
+
define("nodes/node", ["require", "exports", "he"], function (require, exports, he_1) {
|
|
59
59
|
"use strict";
|
|
60
60
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
61
|
-
node_1 = __importDefault(node_1);
|
|
62
|
-
type_1 = __importDefault(type_1);
|
|
63
61
|
/**
|
|
64
|
-
*
|
|
65
|
-
* @param {string} value [description]
|
|
62
|
+
* Node Class as base class for TextNode and HTMLElement.
|
|
66
63
|
*/
|
|
67
|
-
var
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
64
|
+
var Node = /** @class */ (function () {
|
|
65
|
+
function Node(parentNode, range) {
|
|
66
|
+
if (parentNode === void 0) { parentNode = null; }
|
|
67
|
+
this.parentNode = parentNode;
|
|
68
|
+
this.childNodes = [];
|
|
69
|
+
Object.defineProperty(this, 'range', {
|
|
70
|
+
enumerable: false,
|
|
71
|
+
writable: true,
|
|
72
|
+
configurable: true,
|
|
73
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
74
|
+
});
|
|
78
75
|
}
|
|
79
|
-
Object.defineProperty(
|
|
80
|
-
get: function () {
|
|
81
|
-
return this._rawText;
|
|
82
|
-
},
|
|
83
|
-
/**
|
|
84
|
-
* Set rawText and invalidate trimmed caches
|
|
85
|
-
*/
|
|
86
|
-
set: function (text) {
|
|
87
|
-
this._rawText = text;
|
|
88
|
-
this._trimmedRawText = void 0;
|
|
89
|
-
this._trimmedText = void 0;
|
|
90
|
-
},
|
|
91
|
-
enumerable: false,
|
|
92
|
-
configurable: true
|
|
93
|
-
});
|
|
94
|
-
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
95
|
-
/**
|
|
96
|
-
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
97
|
-
*/
|
|
98
|
-
get: function () {
|
|
99
|
-
if (this._trimmedRawText !== undefined)
|
|
100
|
-
return this._trimmedRawText;
|
|
101
|
-
this._trimmedRawText = trimText(this.rawText);
|
|
102
|
-
return this._trimmedRawText;
|
|
103
|
-
},
|
|
104
|
-
enumerable: false,
|
|
105
|
-
configurable: true
|
|
106
|
-
});
|
|
107
|
-
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
108
|
-
/**
|
|
109
|
-
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
110
|
-
*/
|
|
76
|
+
Object.defineProperty(Node.prototype, "innerText", {
|
|
111
77
|
get: function () {
|
|
112
|
-
|
|
113
|
-
return this._trimmedText;
|
|
114
|
-
this._trimmedText = trimText(this.text);
|
|
115
|
-
return this._trimmedText;
|
|
78
|
+
return this.rawText;
|
|
116
79
|
},
|
|
117
80
|
enumerable: false,
|
|
118
81
|
configurable: true
|
|
119
82
|
});
|
|
120
|
-
Object.defineProperty(
|
|
121
|
-
/**
|
|
122
|
-
* Get unescaped text value of current node and its children.
|
|
123
|
-
* @return {string} text content
|
|
124
|
-
*/
|
|
83
|
+
Object.defineProperty(Node.prototype, "textContent", {
|
|
125
84
|
get: function () {
|
|
126
85
|
return (0, he_1.decode)(this.rawText);
|
|
127
86
|
},
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
});
|
|
131
|
-
Object.defineProperty(TextNode.prototype, "isWhitespace", {
|
|
132
|
-
/**
|
|
133
|
-
* Detect if the node contains only white space.
|
|
134
|
-
* @return {boolean}
|
|
135
|
-
*/
|
|
136
|
-
get: function () {
|
|
137
|
-
return /^(\s| )*$/.test(this.rawText);
|
|
87
|
+
set: function (val) {
|
|
88
|
+
this.rawText = (0, he_1.encode)(val);
|
|
138
89
|
},
|
|
139
90
|
enumerable: false,
|
|
140
91
|
configurable: true
|
|
141
92
|
});
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
return TextNode;
|
|
146
|
-
}(node_1.default));
|
|
147
|
-
exports.default = TextNode;
|
|
148
|
-
/**
|
|
149
|
-
* Trim whitespace except single leading/trailing non-breaking space
|
|
150
|
-
*/
|
|
151
|
-
function trimText(text) {
|
|
152
|
-
var i = 0;
|
|
153
|
-
var startPos;
|
|
154
|
-
var endPos;
|
|
155
|
-
while (i >= 0 && i < text.length) {
|
|
156
|
-
if (/\S/.test(text[i])) {
|
|
157
|
-
if (startPos === undefined) {
|
|
158
|
-
startPos = i;
|
|
159
|
-
i = text.length;
|
|
160
|
-
}
|
|
161
|
-
else {
|
|
162
|
-
endPos = i;
|
|
163
|
-
i = void 0;
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
if (startPos === undefined)
|
|
167
|
-
i++;
|
|
168
|
-
else
|
|
169
|
-
i--;
|
|
170
|
-
}
|
|
171
|
-
if (startPos === undefined)
|
|
172
|
-
startPos = 0;
|
|
173
|
-
if (endPos === undefined)
|
|
174
|
-
endPos = text.length - 1;
|
|
175
|
-
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
176
|
-
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
177
|
-
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
178
|
-
}
|
|
93
|
+
return Node;
|
|
94
|
+
}());
|
|
95
|
+
exports.default = Node;
|
|
179
96
|
});
|
|
180
|
-
define("matcher", ["require", "exports", "nodes/type"], function (require, exports,
|
|
97
|
+
define("matcher", ["require", "exports", "nodes/type"], function (require, exports, type_1) {
|
|
181
98
|
"use strict";
|
|
182
99
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
183
|
-
|
|
100
|
+
type_1 = __importDefault(type_1);
|
|
184
101
|
function isTag(node) {
|
|
185
|
-
return node && node.nodeType ===
|
|
102
|
+
return node && node.nodeType === type_1.default.ELEMENT_NODE;
|
|
186
103
|
}
|
|
187
104
|
function getAttributeValue(elem, name) {
|
|
188
105
|
return isTag(elem) ? elem.getAttribute(name) : undefined;
|
|
@@ -282,21 +199,146 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
|
|
|
282
199
|
findAll: findAll
|
|
283
200
|
};
|
|
284
201
|
});
|
|
285
|
-
define("nodes/
|
|
202
|
+
define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], function (require, exports, he_2, node_1, type_2) {
|
|
203
|
+
"use strict";
|
|
204
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
205
|
+
node_1 = __importDefault(node_1);
|
|
206
|
+
type_2 = __importDefault(type_2);
|
|
207
|
+
/**
|
|
208
|
+
* TextNode to contain a text element in DOM tree.
|
|
209
|
+
* @param {string} value [description]
|
|
210
|
+
*/
|
|
211
|
+
var TextNode = /** @class */ (function (_super) {
|
|
212
|
+
__extends(TextNode, _super);
|
|
213
|
+
function TextNode(rawText, parentNode, range) {
|
|
214
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
215
|
+
/**
|
|
216
|
+
* Node Type declaration.
|
|
217
|
+
* @type {Number}
|
|
218
|
+
*/
|
|
219
|
+
_this.nodeType = type_2.default.TEXT_NODE;
|
|
220
|
+
_this._rawText = rawText;
|
|
221
|
+
return _this;
|
|
222
|
+
}
|
|
223
|
+
TextNode.prototype.clone = function () {
|
|
224
|
+
return new TextNode(this._rawText, null);
|
|
225
|
+
};
|
|
226
|
+
Object.defineProperty(TextNode.prototype, "rawText", {
|
|
227
|
+
get: function () {
|
|
228
|
+
return this._rawText;
|
|
229
|
+
},
|
|
230
|
+
/**
|
|
231
|
+
* Set rawText and invalidate trimmed caches
|
|
232
|
+
*/
|
|
233
|
+
set: function (text) {
|
|
234
|
+
this._rawText = text;
|
|
235
|
+
this._trimmedRawText = void 0;
|
|
236
|
+
this._trimmedText = void 0;
|
|
237
|
+
},
|
|
238
|
+
enumerable: false,
|
|
239
|
+
configurable: true
|
|
240
|
+
});
|
|
241
|
+
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
242
|
+
/**
|
|
243
|
+
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
244
|
+
*/
|
|
245
|
+
get: function () {
|
|
246
|
+
if (this._trimmedRawText !== undefined)
|
|
247
|
+
return this._trimmedRawText;
|
|
248
|
+
this._trimmedRawText = trimText(this.rawText);
|
|
249
|
+
return this._trimmedRawText;
|
|
250
|
+
},
|
|
251
|
+
enumerable: false,
|
|
252
|
+
configurable: true
|
|
253
|
+
});
|
|
254
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
255
|
+
/**
|
|
256
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
257
|
+
*/
|
|
258
|
+
get: function () {
|
|
259
|
+
if (this._trimmedText !== undefined)
|
|
260
|
+
return this._trimmedText;
|
|
261
|
+
this._trimmedText = trimText(this.text);
|
|
262
|
+
return this._trimmedText;
|
|
263
|
+
},
|
|
264
|
+
enumerable: false,
|
|
265
|
+
configurable: true
|
|
266
|
+
});
|
|
267
|
+
Object.defineProperty(TextNode.prototype, "text", {
|
|
268
|
+
/**
|
|
269
|
+
* Get unescaped text value of current node and its children.
|
|
270
|
+
* @return {string} text content
|
|
271
|
+
*/
|
|
272
|
+
get: function () {
|
|
273
|
+
return (0, he_2.decode)(this.rawText);
|
|
274
|
+
},
|
|
275
|
+
enumerable: false,
|
|
276
|
+
configurable: true
|
|
277
|
+
});
|
|
278
|
+
Object.defineProperty(TextNode.prototype, "isWhitespace", {
|
|
279
|
+
/**
|
|
280
|
+
* Detect if the node contains only white space.
|
|
281
|
+
* @return {boolean}
|
|
282
|
+
*/
|
|
283
|
+
get: function () {
|
|
284
|
+
return /^(\s| )*$/.test(this.rawText);
|
|
285
|
+
},
|
|
286
|
+
enumerable: false,
|
|
287
|
+
configurable: true
|
|
288
|
+
});
|
|
289
|
+
TextNode.prototype.toString = function () {
|
|
290
|
+
return this.rawText;
|
|
291
|
+
};
|
|
292
|
+
return TextNode;
|
|
293
|
+
}(node_1.default));
|
|
294
|
+
exports.default = TextNode;
|
|
295
|
+
/**
|
|
296
|
+
* Trim whitespace except single leading/trailing non-breaking space
|
|
297
|
+
*/
|
|
298
|
+
function trimText(text) {
|
|
299
|
+
var i = 0;
|
|
300
|
+
var startPos;
|
|
301
|
+
var endPos;
|
|
302
|
+
while (i >= 0 && i < text.length) {
|
|
303
|
+
if (/\S/.test(text[i])) {
|
|
304
|
+
if (startPos === undefined) {
|
|
305
|
+
startPos = i;
|
|
306
|
+
i = text.length;
|
|
307
|
+
}
|
|
308
|
+
else {
|
|
309
|
+
endPos = i;
|
|
310
|
+
i = void 0;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (startPos === undefined)
|
|
314
|
+
i++;
|
|
315
|
+
else
|
|
316
|
+
i--;
|
|
317
|
+
}
|
|
318
|
+
if (startPos === undefined)
|
|
319
|
+
startPos = 0;
|
|
320
|
+
if (endPos === undefined)
|
|
321
|
+
endPos = text.length - 1;
|
|
322
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
323
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
324
|
+
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
325
|
+
}
|
|
326
|
+
});
|
|
327
|
+
define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher", "nodes/comment", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, css_select_1, he_3, back_1, matcher_1, comment_1, node_2, text_1, type_3) {
|
|
286
328
|
"use strict";
|
|
287
329
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
288
330
|
exports.parse = exports.base_parse = void 0;
|
|
289
|
-
|
|
290
|
-
node_2 = __importDefault(node_2);
|
|
291
|
-
type_3 = __importDefault(type_3);
|
|
292
|
-
text_1 = __importDefault(text_1);
|
|
293
|
-
matcher_1 = __importDefault(matcher_1);
|
|
331
|
+
he_3 = __importDefault(he_3);
|
|
294
332
|
back_1 = __importDefault(back_1);
|
|
333
|
+
matcher_1 = __importDefault(matcher_1);
|
|
295
334
|
comment_1 = __importDefault(comment_1);
|
|
335
|
+
node_2 = __importDefault(node_2);
|
|
336
|
+
text_1 = __importDefault(text_1);
|
|
337
|
+
type_3 = __importDefault(type_3);
|
|
296
338
|
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
297
339
|
function decode(val) {
|
|
298
340
|
// clone string
|
|
299
|
-
return JSON.parse(JSON.stringify(
|
|
341
|
+
return JSON.parse(JSON.stringify(he_3.default.decode(val)));
|
|
300
342
|
}
|
|
301
343
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
302
344
|
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
@@ -332,7 +374,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
332
374
|
}
|
|
333
375
|
DOMTokenList.prototype._validate = function (c) {
|
|
334
376
|
if (/\s/.test(c)) {
|
|
335
|
-
throw new Error("DOMException in DOMTokenList.add: The token '"
|
|
377
|
+
throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
|
|
336
378
|
}
|
|
337
379
|
};
|
|
338
380
|
DOMTokenList.prototype.add = function (c) {
|
|
@@ -416,14 +458,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
416
458
|
);
|
|
417
459
|
if (keyAttrs.id) {
|
|
418
460
|
if (!rawAttrs) {
|
|
419
|
-
_this.rawAttrs = "id=\""
|
|
461
|
+
_this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
|
|
420
462
|
}
|
|
421
463
|
}
|
|
422
464
|
if (keyAttrs.class) {
|
|
423
465
|
if (!rawAttrs) {
|
|
424
|
-
var cls = "class=\""
|
|
466
|
+
var cls = "class=\"".concat(_this.classList.toString(), "\"");
|
|
425
467
|
if (_this.rawAttrs) {
|
|
426
|
-
_this.rawAttrs += " "
|
|
468
|
+
_this.rawAttrs += " ".concat(cls);
|
|
427
469
|
}
|
|
428
470
|
else {
|
|
429
471
|
_this.rawAttrs = cls;
|
|
@@ -568,7 +610,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
568
610
|
else {
|
|
569
611
|
var text = node.trimmedText;
|
|
570
612
|
if (currentBlock.prependWhitespace) {
|
|
571
|
-
text = " "
|
|
613
|
+
text = " ".concat(text);
|
|
572
614
|
currentBlock.prependWhitespace = false;
|
|
573
615
|
}
|
|
574
616
|
currentBlock.push(text);
|
|
@@ -589,8 +631,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
589
631
|
HTMLElement.prototype.toString = function () {
|
|
590
632
|
var tag = this.rawTagName;
|
|
591
633
|
if (tag) {
|
|
592
|
-
var attrs = this.rawAttrs ? " "
|
|
593
|
-
return this.isVoidElement ? "<"
|
|
634
|
+
var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
|
|
635
|
+
return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
|
|
594
636
|
}
|
|
595
637
|
return this.innerHTML;
|
|
596
638
|
};
|
|
@@ -686,9 +728,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
686
728
|
res.push(' '.repeat(indention) + str);
|
|
687
729
|
}
|
|
688
730
|
function dfs(node) {
|
|
689
|
-
var idStr = node.id ? "#"
|
|
690
|
-
var classStr = node.classList.length ? "."
|
|
691
|
-
write(""
|
|
731
|
+
var idStr = node.id ? "#".concat(node.id) : '';
|
|
732
|
+
var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
733
|
+
write("".concat(node.rawTagName).concat(idStr).concat(classStr));
|
|
692
734
|
indention++;
|
|
693
735
|
node.childNodes.forEach(function (childNode) {
|
|
694
736
|
if (childNode.nodeType === type_3.default.ELEMENT_NODE) {
|
|
@@ -947,7 +989,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
947
989
|
if (val === undefined || val === 'null') {
|
|
948
990
|
return name;
|
|
949
991
|
}
|
|
950
|
-
return name
|
|
992
|
+
return "".concat(name, "=").concat(val);
|
|
951
993
|
})
|
|
952
994
|
.join(' ');
|
|
953
995
|
// Update this.id
|
|
@@ -994,7 +1036,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
994
1036
|
var val = _this.quoteAttribute(attrs[name]);
|
|
995
1037
|
if (val === 'null' || val === '""')
|
|
996
1038
|
return name;
|
|
997
|
-
return name
|
|
1039
|
+
return "".concat(name, "=").concat(val);
|
|
998
1040
|
})
|
|
999
1041
|
.join(' ');
|
|
1000
1042
|
// Update this.id
|
|
@@ -1022,7 +1064,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1022
1064
|
var val = attributes[name];
|
|
1023
1065
|
if (val === 'null' || val === '""')
|
|
1024
1066
|
return name;
|
|
1025
|
-
return name
|
|
1067
|
+
return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
|
|
1026
1068
|
})
|
|
1027
1069
|
.join(' ');
|
|
1028
1070
|
};
|
|
@@ -1064,7 +1106,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1064
1106
|
});
|
|
1065
1107
|
}
|
|
1066
1108
|
else {
|
|
1067
|
-
throw new Error("The value provided ('"
|
|
1109
|
+
throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
|
|
1068
1110
|
}
|
|
1069
1111
|
// if (!where || html === undefined || html === null) {
|
|
1070
1112
|
// return;
|
|
@@ -1109,6 +1151,45 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1109
1151
|
enumerable: false,
|
|
1110
1152
|
configurable: true
|
|
1111
1153
|
});
|
|
1154
|
+
Object.defineProperty(HTMLElement.prototype, "previousSibling", {
|
|
1155
|
+
get: function () {
|
|
1156
|
+
if (this.parentNode) {
|
|
1157
|
+
var children = this.parentNode.childNodes;
|
|
1158
|
+
var i = children.length;
|
|
1159
|
+
while (i > 0) {
|
|
1160
|
+
var child = children[--i];
|
|
1161
|
+
if (this === child)
|
|
1162
|
+
return children[i - 1] || null;
|
|
1163
|
+
}
|
|
1164
|
+
return null;
|
|
1165
|
+
}
|
|
1166
|
+
},
|
|
1167
|
+
enumerable: false,
|
|
1168
|
+
configurable: true
|
|
1169
|
+
});
|
|
1170
|
+
Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
|
|
1171
|
+
get: function () {
|
|
1172
|
+
if (this.parentNode) {
|
|
1173
|
+
var children = this.parentNode.childNodes;
|
|
1174
|
+
var i = children.length;
|
|
1175
|
+
var find = false;
|
|
1176
|
+
while (i > 0) {
|
|
1177
|
+
var child = children[--i];
|
|
1178
|
+
if (find) {
|
|
1179
|
+
if (child instanceof HTMLElement) {
|
|
1180
|
+
return child || null;
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
else if (this === child) {
|
|
1184
|
+
find = true;
|
|
1185
|
+
}
|
|
1186
|
+
}
|
|
1187
|
+
return null;
|
|
1188
|
+
}
|
|
1189
|
+
},
|
|
1190
|
+
enumerable: false,
|
|
1191
|
+
configurable: true
|
|
1192
|
+
});
|
|
1112
1193
|
Object.defineProperty(HTMLElement.prototype, "classNames", {
|
|
1113
1194
|
get: function () {
|
|
1114
1195
|
return this.classList.toString();
|
|
@@ -1116,6 +1197,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1116
1197
|
enumerable: false,
|
|
1117
1198
|
configurable: true
|
|
1118
1199
|
});
|
|
1200
|
+
/**
|
|
1201
|
+
* Clone this Node
|
|
1202
|
+
*/
|
|
1203
|
+
HTMLElement.prototype.clone = function () {
|
|
1204
|
+
return parse(this.toString()).firstChild;
|
|
1205
|
+
};
|
|
1119
1206
|
return HTMLElement;
|
|
1120
1207
|
}(node_2.default));
|
|
1121
1208
|
exports.default = HTMLElement;
|
|
@@ -1208,8 +1295,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1208
1295
|
pre: true,
|
|
1209
1296
|
};
|
|
1210
1297
|
var element_names = Object.keys(elements);
|
|
1211
|
-
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^"
|
|
1212
|
-
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^"
|
|
1298
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1299
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1213
1300
|
function element_should_be_ignore(tag) {
|
|
1214
1301
|
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1215
1302
|
}
|
|
@@ -1224,7 +1311,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1224
1311
|
var noNestedTagIndex = undefined;
|
|
1225
1312
|
var match;
|
|
1226
1313
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1227
|
-
data = "<"
|
|
1314
|
+
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
1228
1315
|
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1229
1316
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1230
1317
|
var frameFlagOffset = frameflag.length + 2;
|
|
@@ -1292,7 +1379,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1292
1379
|
stack.push(currentParent);
|
|
1293
1380
|
if (is_block_text_element(tagName)) {
|
|
1294
1381
|
// Find closing tag
|
|
1295
|
-
var closeMarkup = "</"
|
|
1382
|
+
var closeMarkup = "</".concat(tagName, ">");
|
|
1296
1383
|
var closeIndex = lowerCaseTagName
|
|
1297
1384
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1298
1385
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
@@ -1359,18 +1446,25 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1359
1446
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1360
1447
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1361
1448
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
oneBefore.
|
|
1365
|
-
|
|
1366
|
-
|
|
1449
|
+
// this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
|
|
1450
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1451
|
+
oneBefore.removeChild(last);
|
|
1452
|
+
last.childNodes.forEach(function (child) {
|
|
1453
|
+
oneBefore.parentNode.appendChild(child);
|
|
1454
|
+
});
|
|
1455
|
+
stack.pop();
|
|
1456
|
+
}
|
|
1367
1457
|
}
|
|
1368
1458
|
else {
|
|
1369
1459
|
// Single error <div> <h3> </div> handle: Just removes <h3>
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1460
|
+
// Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
|
|
1461
|
+
// eslint-disable-next-line no-lonely-if
|
|
1462
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1463
|
+
oneBefore.removeChild(last);
|
|
1464
|
+
last.childNodes.forEach(function (child) {
|
|
1465
|
+
oneBefore.appendChild(child);
|
|
1466
|
+
});
|
|
1467
|
+
}
|
|
1374
1468
|
}
|
|
1375
1469
|
}
|
|
1376
1470
|
else {
|
|
@@ -1389,45 +1483,6 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1389
1483
|
}
|
|
1390
1484
|
exports.parse = parse;
|
|
1391
1485
|
});
|
|
1392
|
-
define("nodes/node", ["require", "exports", "he"], function (require, exports, he_3) {
|
|
1393
|
-
"use strict";
|
|
1394
|
-
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1395
|
-
/**
|
|
1396
|
-
* Node Class as base class for TextNode and HTMLElement.
|
|
1397
|
-
*/
|
|
1398
|
-
var Node = /** @class */ (function () {
|
|
1399
|
-
function Node(parentNode, range) {
|
|
1400
|
-
if (parentNode === void 0) { parentNode = null; }
|
|
1401
|
-
this.parentNode = parentNode;
|
|
1402
|
-
this.childNodes = [];
|
|
1403
|
-
Object.defineProperty(this, 'range', {
|
|
1404
|
-
enumerable: false,
|
|
1405
|
-
writable: true,
|
|
1406
|
-
configurable: true,
|
|
1407
|
-
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
1408
|
-
});
|
|
1409
|
-
}
|
|
1410
|
-
Object.defineProperty(Node.prototype, "innerText", {
|
|
1411
|
-
get: function () {
|
|
1412
|
-
return this.rawText;
|
|
1413
|
-
},
|
|
1414
|
-
enumerable: false,
|
|
1415
|
-
configurable: true
|
|
1416
|
-
});
|
|
1417
|
-
Object.defineProperty(Node.prototype, "textContent", {
|
|
1418
|
-
get: function () {
|
|
1419
|
-
return (0, he_3.decode)(this.rawText);
|
|
1420
|
-
},
|
|
1421
|
-
set: function (val) {
|
|
1422
|
-
this.rawText = (0, he_3.encode)(val);
|
|
1423
|
-
},
|
|
1424
|
-
enumerable: false,
|
|
1425
|
-
configurable: true
|
|
1426
|
-
});
|
|
1427
|
-
return Node;
|
|
1428
|
-
}());
|
|
1429
|
-
exports.default = Node;
|
|
1430
|
-
});
|
|
1431
1486
|
define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], function (require, exports, node_3, type_4) {
|
|
1432
1487
|
"use strict";
|
|
1433
1488
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -1445,6 +1500,9 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
|
|
|
1445
1500
|
_this.nodeType = type_4.default.COMMENT_NODE;
|
|
1446
1501
|
return _this;
|
|
1447
1502
|
}
|
|
1503
|
+
CommentNode.prototype.clone = function () {
|
|
1504
|
+
return new CommentNode(this.rawText, null);
|
|
1505
|
+
};
|
|
1448
1506
|
Object.defineProperty(CommentNode.prototype, "text", {
|
|
1449
1507
|
/**
|
|
1450
1508
|
* Get unescaped text value of current node and its children.
|
|
@@ -1457,7 +1515,7 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
|
|
|
1457
1515
|
configurable: true
|
|
1458
1516
|
});
|
|
1459
1517
|
CommentNode.prototype.toString = function () {
|
|
1460
|
-
return "<!--"
|
|
1518
|
+
return "<!--".concat(this.rawText, "-->");
|
|
1461
1519
|
};
|
|
1462
1520
|
return CommentNode;
|
|
1463
1521
|
}(node_3.default));
|
package/dist/nodes/comment.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import HTMLElement from './html';
|
|
1
2
|
import Node from './node';
|
|
2
3
|
import NodeType from './type';
|
|
3
|
-
import HTMLElement from './html';
|
|
4
4
|
export default class CommentNode extends Node {
|
|
5
5
|
rawText: string;
|
|
6
|
+
clone(): CommentNode;
|
|
6
7
|
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
7
8
|
/**
|
|
8
9
|
* Node Type declaration.
|
package/dist/nodes/comment.js
CHANGED
|
@@ -32,6 +32,9 @@ var CommentNode = /** @class */ (function (_super) {
|
|
|
32
32
|
_this.nodeType = type_1.default.COMMENT_NODE;
|
|
33
33
|
return _this;
|
|
34
34
|
}
|
|
35
|
+
CommentNode.prototype.clone = function () {
|
|
36
|
+
return new CommentNode(this.rawText, null);
|
|
37
|
+
};
|
|
35
38
|
Object.defineProperty(CommentNode.prototype, "text", {
|
|
36
39
|
/**
|
|
37
40
|
* Get unescaped text value of current node and its children.
|
|
@@ -44,7 +47,7 @@ var CommentNode = /** @class */ (function (_super) {
|
|
|
44
47
|
configurable: true
|
|
45
48
|
});
|
|
46
49
|
CommentNode.prototype.toString = function () {
|
|
47
|
-
return "<!--"
|
|
50
|
+
return "<!--".concat(this.rawText, "-->");
|
|
48
51
|
};
|
|
49
52
|
return CommentNode;
|
|
50
53
|
}(node_1.default));
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -189,11 +189,18 @@ export default class HTMLElement extends Node {
|
|
|
189
189
|
insertAdjacentHTML(where: InsertPosition, html: string): void;
|
|
190
190
|
get nextSibling(): Node;
|
|
191
191
|
get nextElementSibling(): HTMLElement;
|
|
192
|
+
get previousSibling(): Node;
|
|
193
|
+
get previousElementSibling(): HTMLElement;
|
|
192
194
|
get classNames(): string;
|
|
195
|
+
/**
|
|
196
|
+
* Clone this Node
|
|
197
|
+
*/
|
|
198
|
+
clone(): Node;
|
|
193
199
|
}
|
|
194
200
|
export interface Options {
|
|
195
201
|
lowerCaseTagName: boolean;
|
|
196
202
|
comment: boolean;
|
|
203
|
+
parseNoneClosedTags?: boolean;
|
|
197
204
|
blockTextElements: {
|
|
198
205
|
[tag: string]: boolean;
|
|
199
206
|
};
|
package/dist/nodes/html.js
CHANGED
|
@@ -39,14 +39,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
39
39
|
};
|
|
40
40
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
41
|
exports.parse = exports.base_parse = void 0;
|
|
42
|
-
var he_1 = __importDefault(require("he"));
|
|
43
42
|
var css_select_1 = require("css-select");
|
|
44
|
-
var
|
|
45
|
-
var type_1 = __importDefault(require("./type"));
|
|
46
|
-
var text_1 = __importDefault(require("./text"));
|
|
47
|
-
var matcher_1 = __importDefault(require("../matcher"));
|
|
43
|
+
var he_1 = __importDefault(require("he"));
|
|
48
44
|
var back_1 = __importDefault(require("../back"));
|
|
45
|
+
var matcher_1 = __importDefault(require("../matcher"));
|
|
49
46
|
var comment_1 = __importDefault(require("./comment"));
|
|
47
|
+
var node_1 = __importDefault(require("./node"));
|
|
48
|
+
var text_1 = __importDefault(require("./text"));
|
|
49
|
+
var type_1 = __importDefault(require("./type"));
|
|
50
50
|
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
51
51
|
function decode(val) {
|
|
52
52
|
// clone string
|
|
@@ -86,7 +86,7 @@ var DOMTokenList = /** @class */ (function () {
|
|
|
86
86
|
}
|
|
87
87
|
DOMTokenList.prototype._validate = function (c) {
|
|
88
88
|
if (/\s/.test(c)) {
|
|
89
|
-
throw new Error("DOMException in DOMTokenList.add: The token '"
|
|
89
|
+
throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
|
|
90
90
|
}
|
|
91
91
|
};
|
|
92
92
|
DOMTokenList.prototype.add = function (c) {
|
|
@@ -170,14 +170,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
170
170
|
);
|
|
171
171
|
if (keyAttrs.id) {
|
|
172
172
|
if (!rawAttrs) {
|
|
173
|
-
_this.rawAttrs = "id=\""
|
|
173
|
+
_this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
|
|
174
174
|
}
|
|
175
175
|
}
|
|
176
176
|
if (keyAttrs.class) {
|
|
177
177
|
if (!rawAttrs) {
|
|
178
|
-
var cls = "class=\""
|
|
178
|
+
var cls = "class=\"".concat(_this.classList.toString(), "\"");
|
|
179
179
|
if (_this.rawAttrs) {
|
|
180
|
-
_this.rawAttrs += " "
|
|
180
|
+
_this.rawAttrs += " ".concat(cls);
|
|
181
181
|
}
|
|
182
182
|
else {
|
|
183
183
|
_this.rawAttrs = cls;
|
|
@@ -322,7 +322,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
322
322
|
else {
|
|
323
323
|
var text = node.trimmedText;
|
|
324
324
|
if (currentBlock.prependWhitespace) {
|
|
325
|
-
text = " "
|
|
325
|
+
text = " ".concat(text);
|
|
326
326
|
currentBlock.prependWhitespace = false;
|
|
327
327
|
}
|
|
328
328
|
currentBlock.push(text);
|
|
@@ -343,8 +343,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
343
343
|
HTMLElement.prototype.toString = function () {
|
|
344
344
|
var tag = this.rawTagName;
|
|
345
345
|
if (tag) {
|
|
346
|
-
var attrs = this.rawAttrs ? " "
|
|
347
|
-
return this.isVoidElement ? "<"
|
|
346
|
+
var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
|
|
347
|
+
return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
|
|
348
348
|
}
|
|
349
349
|
return this.innerHTML;
|
|
350
350
|
};
|
|
@@ -440,9 +440,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
440
440
|
res.push(' '.repeat(indention) + str);
|
|
441
441
|
}
|
|
442
442
|
function dfs(node) {
|
|
443
|
-
var idStr = node.id ? "#"
|
|
444
|
-
var classStr = node.classList.length ? "."
|
|
445
|
-
write(""
|
|
443
|
+
var idStr = node.id ? "#".concat(node.id) : '';
|
|
444
|
+
var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
445
|
+
write("".concat(node.rawTagName).concat(idStr).concat(classStr));
|
|
446
446
|
indention++;
|
|
447
447
|
node.childNodes.forEach(function (childNode) {
|
|
448
448
|
if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
|
|
@@ -701,7 +701,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
701
701
|
if (val === undefined || val === 'null') {
|
|
702
702
|
return name;
|
|
703
703
|
}
|
|
704
|
-
return name
|
|
704
|
+
return "".concat(name, "=").concat(val);
|
|
705
705
|
})
|
|
706
706
|
.join(' ');
|
|
707
707
|
// Update this.id
|
|
@@ -748,7 +748,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
748
748
|
var val = _this.quoteAttribute(attrs[name]);
|
|
749
749
|
if (val === 'null' || val === '""')
|
|
750
750
|
return name;
|
|
751
|
-
return name
|
|
751
|
+
return "".concat(name, "=").concat(val);
|
|
752
752
|
})
|
|
753
753
|
.join(' ');
|
|
754
754
|
// Update this.id
|
|
@@ -776,7 +776,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
776
776
|
var val = attributes[name];
|
|
777
777
|
if (val === 'null' || val === '""')
|
|
778
778
|
return name;
|
|
779
|
-
return name
|
|
779
|
+
return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
|
|
780
780
|
})
|
|
781
781
|
.join(' ');
|
|
782
782
|
};
|
|
@@ -818,7 +818,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
818
818
|
});
|
|
819
819
|
}
|
|
820
820
|
else {
|
|
821
|
-
throw new Error("The value provided ('"
|
|
821
|
+
throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
|
|
822
822
|
}
|
|
823
823
|
// if (!where || html === undefined || html === null) {
|
|
824
824
|
// return;
|
|
@@ -863,6 +863,45 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
863
863
|
enumerable: false,
|
|
864
864
|
configurable: true
|
|
865
865
|
});
|
|
866
|
+
Object.defineProperty(HTMLElement.prototype, "previousSibling", {
|
|
867
|
+
get: function () {
|
|
868
|
+
if (this.parentNode) {
|
|
869
|
+
var children = this.parentNode.childNodes;
|
|
870
|
+
var i = children.length;
|
|
871
|
+
while (i > 0) {
|
|
872
|
+
var child = children[--i];
|
|
873
|
+
if (this === child)
|
|
874
|
+
return children[i - 1] || null;
|
|
875
|
+
}
|
|
876
|
+
return null;
|
|
877
|
+
}
|
|
878
|
+
},
|
|
879
|
+
enumerable: false,
|
|
880
|
+
configurable: true
|
|
881
|
+
});
|
|
882
|
+
Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
|
|
883
|
+
get: function () {
|
|
884
|
+
if (this.parentNode) {
|
|
885
|
+
var children = this.parentNode.childNodes;
|
|
886
|
+
var i = children.length;
|
|
887
|
+
var find = false;
|
|
888
|
+
while (i > 0) {
|
|
889
|
+
var child = children[--i];
|
|
890
|
+
if (find) {
|
|
891
|
+
if (child instanceof HTMLElement) {
|
|
892
|
+
return child || null;
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
else if (this === child) {
|
|
896
|
+
find = true;
|
|
897
|
+
}
|
|
898
|
+
}
|
|
899
|
+
return null;
|
|
900
|
+
}
|
|
901
|
+
},
|
|
902
|
+
enumerable: false,
|
|
903
|
+
configurable: true
|
|
904
|
+
});
|
|
866
905
|
Object.defineProperty(HTMLElement.prototype, "classNames", {
|
|
867
906
|
get: function () {
|
|
868
907
|
return this.classList.toString();
|
|
@@ -870,6 +909,12 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
870
909
|
enumerable: false,
|
|
871
910
|
configurable: true
|
|
872
911
|
});
|
|
912
|
+
/**
|
|
913
|
+
* Clone this Node
|
|
914
|
+
*/
|
|
915
|
+
HTMLElement.prototype.clone = function () {
|
|
916
|
+
return parse(this.toString()).firstChild;
|
|
917
|
+
};
|
|
873
918
|
return HTMLElement;
|
|
874
919
|
}(node_1.default));
|
|
875
920
|
exports.default = HTMLElement;
|
|
@@ -962,8 +1007,8 @@ function base_parse(data, options) {
|
|
|
962
1007
|
pre: true,
|
|
963
1008
|
};
|
|
964
1009
|
var element_names = Object.keys(elements);
|
|
965
|
-
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^"
|
|
966
|
-
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^"
|
|
1010
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
1011
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
|
|
967
1012
|
function element_should_be_ignore(tag) {
|
|
968
1013
|
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
969
1014
|
}
|
|
@@ -978,7 +1023,7 @@ function base_parse(data, options) {
|
|
|
978
1023
|
var noNestedTagIndex = undefined;
|
|
979
1024
|
var match;
|
|
980
1025
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
981
|
-
data = "<"
|
|
1026
|
+
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
982
1027
|
var lowerCaseTagName = options.lowerCaseTagName;
|
|
983
1028
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
984
1029
|
var frameFlagOffset = frameflag.length + 2;
|
|
@@ -1046,7 +1091,7 @@ function base_parse(data, options) {
|
|
|
1046
1091
|
stack.push(currentParent);
|
|
1047
1092
|
if (is_block_text_element(tagName)) {
|
|
1048
1093
|
// Find closing tag
|
|
1049
|
-
var closeMarkup = "</"
|
|
1094
|
+
var closeMarkup = "</".concat(tagName, ">");
|
|
1050
1095
|
var closeIndex = lowerCaseTagName
|
|
1051
1096
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1052
1097
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
@@ -1113,18 +1158,25 @@ function parse(data, options) {
|
|
|
1113
1158
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1114
1159
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1115
1160
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
oneBefore.
|
|
1119
|
-
|
|
1120
|
-
|
|
1161
|
+
// this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
|
|
1162
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1163
|
+
oneBefore.removeChild(last);
|
|
1164
|
+
last.childNodes.forEach(function (child) {
|
|
1165
|
+
oneBefore.parentNode.appendChild(child);
|
|
1166
|
+
});
|
|
1167
|
+
stack.pop();
|
|
1168
|
+
}
|
|
1121
1169
|
}
|
|
1122
1170
|
else {
|
|
1123
1171
|
// Single error <div> <h3> </div> handle: Just removes <h3>
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1172
|
+
// Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
|
|
1173
|
+
// eslint-disable-next-line no-lonely-if
|
|
1174
|
+
if (options.parseNoneClosedTags !== true) {
|
|
1175
|
+
oneBefore.removeChild(last);
|
|
1176
|
+
last.childNodes.forEach(function (child) {
|
|
1177
|
+
oneBefore.appendChild(child);
|
|
1178
|
+
});
|
|
1179
|
+
}
|
|
1128
1180
|
}
|
|
1129
1181
|
}
|
|
1130
1182
|
else {
|
package/dist/nodes/node.d.ts
CHANGED
|
@@ -11,6 +11,7 @@ export default abstract class Node {
|
|
|
11
11
|
abstract text: string;
|
|
12
12
|
abstract rawText: string;
|
|
13
13
|
abstract toString(): string;
|
|
14
|
+
abstract clone(): Node;
|
|
14
15
|
constructor(parentNode?: HTMLElement, range?: [number, number]);
|
|
15
16
|
get innerText(): string;
|
|
16
17
|
get textContent(): string;
|
package/dist/nodes/text.d.ts
CHANGED
package/dist/nodes/text.js
CHANGED
|
@@ -37,6 +37,9 @@ var TextNode = /** @class */ (function (_super) {
|
|
|
37
37
|
_this._rawText = rawText;
|
|
38
38
|
return _this;
|
|
39
39
|
}
|
|
40
|
+
TextNode.prototype.clone = function () {
|
|
41
|
+
return new TextNode(this._rawText, null);
|
|
42
|
+
};
|
|
40
43
|
Object.defineProperty(TextNode.prototype, "rawText", {
|
|
41
44
|
get: function () {
|
|
42
45
|
return this._rawText;
|
package/package.json
CHANGED
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "5.2.
|
|
3
|
+
"version": "5.2.6",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
+
"exports": {
|
|
8
|
+
"require": "./dist/index.js",
|
|
9
|
+
"import": "./esm/index.js",
|
|
10
|
+
"types": "./dist/index.d.ts"
|
|
11
|
+
},
|
|
7
12
|
"scripts": {
|
|
8
13
|
"compile": "tsc",
|
|
9
14
|
"build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
|
|
@@ -48,7 +53,7 @@
|
|
|
48
53
|
"registry": "https://registry.npmjs.org"
|
|
49
54
|
},
|
|
50
55
|
"dependencies": {
|
|
51
|
-
"css-select": "^4.1
|
|
56
|
+
"css-select": "^4.2.1",
|
|
52
57
|
"he": "1.2.0"
|
|
53
58
|
},
|
|
54
59
|
"devDependencies": {
|
|
@@ -67,7 +72,6 @@
|
|
|
67
72
|
"high5": "^1.0.0",
|
|
68
73
|
"html-dom-parser": "^1.0.4",
|
|
69
74
|
"html-parser": "^0.11.0",
|
|
70
|
-
"html5": "^1.0.5",
|
|
71
75
|
"html5parser": "^2.0.2",
|
|
72
76
|
"htmljs-parser": "^2.11.1",
|
|
73
77
|
"htmlparser": "^1.7.7",
|
|
@@ -109,9 +113,5 @@
|
|
|
109
113
|
"url": "https://github.com/taoqf/node-fast-html-parser/issues"
|
|
110
114
|
},
|
|
111
115
|
"homepage": "https://github.com/taoqf/node-fast-html-parser",
|
|
112
|
-
"sideEffects": false
|
|
113
|
-
"exports": {
|
|
114
|
-
"require": "./dist/index.js",
|
|
115
|
-
"import": "./esm/index.js"
|
|
116
|
-
}
|
|
116
|
+
"sideEffects": false
|
|
117
117
|
}
|