node-html-parser 1.2.12 → 1.2.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/main.js +21 -14
- package/dist/nodes/html.d.ts +1 -1
- package/dist/nodes/html.js +21 -14
- package/package.json +1 -1
package/dist/main.js
CHANGED
|
@@ -544,15 +544,11 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
544
544
|
HTMLElement.prototype.toString = function () {
|
|
545
545
|
var tag = this.tagName;
|
|
546
546
|
if (tag) {
|
|
547
|
-
var
|
|
548
|
-
var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
|
|
547
|
+
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
549
548
|
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
|
|
550
|
-
if (
|
|
549
|
+
if (is_void) {
|
|
551
550
|
return "<" + tag + attrs + ">";
|
|
552
551
|
}
|
|
553
|
-
else if (is_self_closed) {
|
|
554
|
-
return "<" + tag + attrs + " />";
|
|
555
|
-
}
|
|
556
552
|
else {
|
|
557
553
|
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
558
554
|
}
|
|
@@ -570,12 +566,13 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
570
566
|
enumerable: true,
|
|
571
567
|
configurable: true
|
|
572
568
|
});
|
|
573
|
-
HTMLElement.prototype.set_content = function (content) {
|
|
569
|
+
HTMLElement.prototype.set_content = function (content, options) {
|
|
570
|
+
if (options === void 0) { options = {}; }
|
|
574
571
|
if (content instanceof node_3.default) {
|
|
575
572
|
content = [content];
|
|
576
573
|
}
|
|
577
574
|
else if (typeof content == 'string') {
|
|
578
|
-
var r = parse(content);
|
|
575
|
+
var r = parse(content, options);
|
|
579
576
|
content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
|
|
580
577
|
}
|
|
581
578
|
this.childNodes = content;
|
|
@@ -693,7 +690,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
693
690
|
return this.childNodes.reduce(function (res, cur) {
|
|
694
691
|
stack.push([cur, 0, false]);
|
|
695
692
|
while (stack.length) {
|
|
696
|
-
var state = back_1.default(stack);
|
|
693
|
+
var state = back_1.default(stack); // get last element
|
|
697
694
|
var el = state[0];
|
|
698
695
|
if (state[1] === 0) {
|
|
699
696
|
// Seen for first time.
|
|
@@ -701,10 +698,12 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
701
698
|
stack.pop();
|
|
702
699
|
continue;
|
|
703
700
|
}
|
|
704
|
-
|
|
701
|
+
var html_el = el;
|
|
702
|
+
state[2] = matcher.advance(html_el);
|
|
705
703
|
if (state[2]) {
|
|
706
704
|
if (matcher.matched) {
|
|
707
|
-
res.push(
|
|
705
|
+
res.push(html_el);
|
|
706
|
+
res.push.apply(res, (html_el.querySelectorAll(selector)));
|
|
708
707
|
// no need to go further.
|
|
709
708
|
matcher.rewind();
|
|
710
709
|
stack.pop();
|
|
@@ -1002,6 +1001,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1002
1001
|
style: true,
|
|
1003
1002
|
pre: true
|
|
1004
1003
|
};
|
|
1004
|
+
var frameflag = 'documentfragmentcontainer';
|
|
1005
1005
|
/**
|
|
1006
1006
|
* Parses HTML and returns a root element
|
|
1007
1007
|
* Parse a chuck of HTML source.
|
|
@@ -1015,6 +1015,8 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1015
1015
|
var stack = [root];
|
|
1016
1016
|
var lastTextPos = -1;
|
|
1017
1017
|
var match;
|
|
1018
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1019
|
+
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
1018
1020
|
var _loop_1 = function () {
|
|
1019
1021
|
if (lastTextPos > -1) {
|
|
1020
1022
|
if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
|
|
@@ -1024,6 +1026,9 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1024
1026
|
}
|
|
1025
1027
|
}
|
|
1026
1028
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1029
|
+
if (match[2] === frameflag) {
|
|
1030
|
+
return "continue";
|
|
1031
|
+
}
|
|
1027
1032
|
if (match[0][1] === '!') {
|
|
1028
1033
|
// this is a comment
|
|
1029
1034
|
if (options.comment) {
|
|
@@ -1033,8 +1038,9 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1033
1038
|
}
|
|
1034
1039
|
return "continue";
|
|
1035
1040
|
}
|
|
1036
|
-
if (options.lowerCaseTagName)
|
|
1041
|
+
if (options.lowerCaseTagName) {
|
|
1037
1042
|
match[2] = match[2].toLowerCase();
|
|
1043
|
+
}
|
|
1038
1044
|
if (!match[1]) {
|
|
1039
1045
|
// not </ tags
|
|
1040
1046
|
var attrs = {};
|
|
@@ -1048,6 +1054,8 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1048
1054
|
currentParent = back_1.default(stack);
|
|
1049
1055
|
}
|
|
1050
1056
|
}
|
|
1057
|
+
// ignore container tag we add above
|
|
1058
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1051
1059
|
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
|
|
1052
1060
|
stack.push(currentParent);
|
|
1053
1061
|
if (kBlockTextElements[match[2]]) {
|
|
@@ -1083,8 +1091,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
|
|
|
1083
1091
|
}
|
|
1084
1092
|
}
|
|
1085
1093
|
}
|
|
1086
|
-
if (match[1] || match[4] ||
|
|
1087
|
-
kSelfClosingElements[match[2]]) {
|
|
1094
|
+
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
|
|
1088
1095
|
// </ or /> or <br> etc.
|
|
1089
1096
|
while (true) {
|
|
1090
1097
|
if (currentParent.tagName === match[2]) {
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -70,7 +70,7 @@ export default class HTMLElement extends Node {
|
|
|
70
70
|
get structuredText(): string;
|
|
71
71
|
toString(): string;
|
|
72
72
|
get innerHTML(): string;
|
|
73
|
-
set_content(content: string | Node | Node[]): void;
|
|
73
|
+
set_content(content: string | Node | Node[], options?: Options): void;
|
|
74
74
|
get outerHTML(): string;
|
|
75
75
|
/**
|
|
76
76
|
* Trim element from right (in block) after seeing pattern in a TextNode.
|
package/dist/nodes/html.js
CHANGED
|
@@ -176,15 +176,11 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
176
176
|
HTMLElement.prototype.toString = function () {
|
|
177
177
|
var tag = this.tagName;
|
|
178
178
|
if (tag) {
|
|
179
|
-
var
|
|
180
|
-
var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
|
|
179
|
+
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
181
180
|
var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
|
|
182
|
-
if (
|
|
181
|
+
if (is_void) {
|
|
183
182
|
return "<" + tag + attrs + ">";
|
|
184
183
|
}
|
|
185
|
-
else if (is_self_closed) {
|
|
186
|
-
return "<" + tag + attrs + " />";
|
|
187
|
-
}
|
|
188
184
|
else {
|
|
189
185
|
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
190
186
|
}
|
|
@@ -202,12 +198,13 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
202
198
|
enumerable: true,
|
|
203
199
|
configurable: true
|
|
204
200
|
});
|
|
205
|
-
HTMLElement.prototype.set_content = function (content) {
|
|
201
|
+
HTMLElement.prototype.set_content = function (content, options) {
|
|
202
|
+
if (options === void 0) { options = {}; }
|
|
206
203
|
if (content instanceof node_1.default) {
|
|
207
204
|
content = [content];
|
|
208
205
|
}
|
|
209
206
|
else if (typeof content == 'string') {
|
|
210
|
-
var r = parse(content);
|
|
207
|
+
var r = parse(content, options);
|
|
211
208
|
content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
|
|
212
209
|
}
|
|
213
210
|
this.childNodes = content;
|
|
@@ -325,7 +322,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
325
322
|
return this.childNodes.reduce(function (res, cur) {
|
|
326
323
|
stack.push([cur, 0, false]);
|
|
327
324
|
while (stack.length) {
|
|
328
|
-
var state = back_1.default(stack);
|
|
325
|
+
var state = back_1.default(stack); // get last element
|
|
329
326
|
var el = state[0];
|
|
330
327
|
if (state[1] === 0) {
|
|
331
328
|
// Seen for first time.
|
|
@@ -333,10 +330,12 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
333
330
|
stack.pop();
|
|
334
331
|
continue;
|
|
335
332
|
}
|
|
336
|
-
|
|
333
|
+
var html_el = el;
|
|
334
|
+
state[2] = matcher.advance(html_el);
|
|
337
335
|
if (state[2]) {
|
|
338
336
|
if (matcher.matched) {
|
|
339
|
-
res.push(
|
|
337
|
+
res.push(html_el);
|
|
338
|
+
res.push.apply(res, (html_el.querySelectorAll(selector)));
|
|
340
339
|
// no need to go further.
|
|
341
340
|
matcher.rewind();
|
|
342
341
|
stack.pop();
|
|
@@ -634,6 +633,7 @@ var kBlockTextElements = {
|
|
|
634
633
|
style: true,
|
|
635
634
|
pre: true
|
|
636
635
|
};
|
|
636
|
+
var frameflag = 'documentfragmentcontainer';
|
|
637
637
|
/**
|
|
638
638
|
* Parses HTML and returns a root element
|
|
639
639
|
* Parse a chuck of HTML source.
|
|
@@ -647,6 +647,8 @@ function parse(data, options) {
|
|
|
647
647
|
var stack = [root];
|
|
648
648
|
var lastTextPos = -1;
|
|
649
649
|
var match;
|
|
650
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
651
|
+
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
650
652
|
var _loop_1 = function () {
|
|
651
653
|
if (lastTextPos > -1) {
|
|
652
654
|
if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
|
|
@@ -656,6 +658,9 @@ function parse(data, options) {
|
|
|
656
658
|
}
|
|
657
659
|
}
|
|
658
660
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
661
|
+
if (match[2] === frameflag) {
|
|
662
|
+
return "continue";
|
|
663
|
+
}
|
|
659
664
|
if (match[0][1] === '!') {
|
|
660
665
|
// this is a comment
|
|
661
666
|
if (options.comment) {
|
|
@@ -665,8 +670,9 @@ function parse(data, options) {
|
|
|
665
670
|
}
|
|
666
671
|
return "continue";
|
|
667
672
|
}
|
|
668
|
-
if (options.lowerCaseTagName)
|
|
673
|
+
if (options.lowerCaseTagName) {
|
|
669
674
|
match[2] = match[2].toLowerCase();
|
|
675
|
+
}
|
|
670
676
|
if (!match[1]) {
|
|
671
677
|
// not </ tags
|
|
672
678
|
var attrs = {};
|
|
@@ -680,6 +686,8 @@ function parse(data, options) {
|
|
|
680
686
|
currentParent = back_1.default(stack);
|
|
681
687
|
}
|
|
682
688
|
}
|
|
689
|
+
// ignore container tag we add above
|
|
690
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
683
691
|
currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
|
|
684
692
|
stack.push(currentParent);
|
|
685
693
|
if (kBlockTextElements[match[2]]) {
|
|
@@ -715,8 +723,7 @@ function parse(data, options) {
|
|
|
715
723
|
}
|
|
716
724
|
}
|
|
717
725
|
}
|
|
718
|
-
if (match[1] || match[4] ||
|
|
719
|
-
kSelfClosingElements[match[2]]) {
|
|
726
|
+
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
|
|
720
727
|
// </ or /> or <br> etc.
|
|
721
728
|
while (true) {
|
|
722
729
|
if (currentParent.tagName === match[2]) {
|
package/package.json
CHANGED