node-html-parser 1.2.12 → 1.2.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -544,15 +544,11 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
544
544
  HTMLElement.prototype.toString = function () {
545
545
  var tag = this.tagName;
546
546
  if (tag) {
547
- var is_un_closed = /^meta$/i.test(tag);
548
- var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
547
+ var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
549
548
  var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
550
- if (is_un_closed) {
549
+ if (is_void) {
551
550
  return "<" + tag + attrs + ">";
552
551
  }
553
- else if (is_self_closed) {
554
- return "<" + tag + attrs + " />";
555
- }
556
552
  else {
557
553
  return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
558
554
  }
@@ -570,12 +566,13 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
570
566
  enumerable: true,
571
567
  configurable: true
572
568
  });
573
- HTMLElement.prototype.set_content = function (content) {
569
+ HTMLElement.prototype.set_content = function (content, options) {
570
+ if (options === void 0) { options = {}; }
574
571
  if (content instanceof node_3.default) {
575
572
  content = [content];
576
573
  }
577
574
  else if (typeof content == 'string') {
578
- var r = parse(content);
575
+ var r = parse(content, options);
579
576
  content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
580
577
  }
581
578
  this.childNodes = content;
@@ -693,7 +690,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
693
690
  return this.childNodes.reduce(function (res, cur) {
694
691
  stack.push([cur, 0, false]);
695
692
  while (stack.length) {
696
- var state = back_1.default(stack);
693
+ var state = back_1.default(stack); // get last element
697
694
  var el = state[0];
698
695
  if (state[1] === 0) {
699
696
  // Seen for first time.
@@ -701,10 +698,12 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
701
698
  stack.pop();
702
699
  continue;
703
700
  }
704
- state[2] = matcher.advance(el);
701
+ var html_el = el;
702
+ state[2] = matcher.advance(html_el);
705
703
  if (state[2]) {
706
704
  if (matcher.matched) {
707
- res.push(el);
705
+ res.push(html_el);
706
+ res.push.apply(res, (html_el.querySelectorAll(selector)));
708
707
  // no need to go further.
709
708
  matcher.rewind();
710
709
  stack.pop();
@@ -1002,6 +1001,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1002
1001
  style: true,
1003
1002
  pre: true
1004
1003
  };
1004
+ var frameflag = 'documentfragmentcontainer';
1005
1005
  /**
1006
1006
  * Parses HTML and returns a root element
1007
1007
  * Parse a chuck of HTML source.
@@ -1015,6 +1015,8 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1015
1015
  var stack = [root];
1016
1016
  var lastTextPos = -1;
1017
1017
  var match;
1018
+ // https://github.com/taoqf/node-html-parser/issues/38
1019
+ data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1018
1020
  var _loop_1 = function () {
1019
1021
  if (lastTextPos > -1) {
1020
1022
  if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
@@ -1024,6 +1026,9 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1024
1026
  }
1025
1027
  }
1026
1028
  lastTextPos = kMarkupPattern.lastIndex;
1029
+ if (match[2] === frameflag) {
1030
+ return "continue";
1031
+ }
1027
1032
  if (match[0][1] === '!') {
1028
1033
  // this is a comment
1029
1034
  if (options.comment) {
@@ -1033,8 +1038,9 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1033
1038
  }
1034
1039
  return "continue";
1035
1040
  }
1036
- if (options.lowerCaseTagName)
1041
+ if (options.lowerCaseTagName) {
1037
1042
  match[2] = match[2].toLowerCase();
1043
+ }
1038
1044
  if (!match[1]) {
1039
1045
  // not </ tags
1040
1046
  var attrs = {};
@@ -1048,6 +1054,8 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1048
1054
  currentParent = back_1.default(stack);
1049
1055
  }
1050
1056
  }
1057
+ // ignore container tag we add above
1058
+ // https://github.com/taoqf/node-html-parser/issues/38
1051
1059
  currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
1052
1060
  stack.push(currentParent);
1053
1061
  if (kBlockTextElements[match[2]]) {
@@ -1083,8 +1091,7 @@ define("nodes/html", ["require", "exports", "he", "nodes/node", "nodes/type", "n
1083
1091
  }
1084
1092
  }
1085
1093
  }
1086
- if (match[1] || match[4] ||
1087
- kSelfClosingElements[match[2]]) {
1094
+ if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1088
1095
  // </ or /> or <br> etc.
1089
1096
  while (true) {
1090
1097
  if (currentParent.tagName === match[2]) {
@@ -70,7 +70,7 @@ export default class HTMLElement extends Node {
70
70
  get structuredText(): string;
71
71
  toString(): string;
72
72
  get innerHTML(): string;
73
- set_content(content: string | Node | Node[]): void;
73
+ set_content(content: string | Node | Node[], options?: Options): void;
74
74
  get outerHTML(): string;
75
75
  /**
76
76
  * Trim element from right (in block) after seeing pattern in a TextNode.
@@ -176,15 +176,11 @@ var HTMLElement = /** @class */ (function (_super) {
176
176
  HTMLElement.prototype.toString = function () {
177
177
  var tag = this.tagName;
178
178
  if (tag) {
179
- var is_un_closed = /^meta$/i.test(tag);
180
- var is_self_closed = /^(img|br|hr|area|base|input|doctype|link)$/i.test(tag);
179
+ var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
181
180
  var attrs = this.rawAttrs ? ' ' + this.rawAttrs : '';
182
- if (is_un_closed) {
181
+ if (is_void) {
183
182
  return "<" + tag + attrs + ">";
184
183
  }
185
- else if (is_self_closed) {
186
- return "<" + tag + attrs + " />";
187
- }
188
184
  else {
189
185
  return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
190
186
  }
@@ -202,12 +198,13 @@ var HTMLElement = /** @class */ (function (_super) {
202
198
  enumerable: true,
203
199
  configurable: true
204
200
  });
205
- HTMLElement.prototype.set_content = function (content) {
201
+ HTMLElement.prototype.set_content = function (content, options) {
202
+ if (options === void 0) { options = {}; }
206
203
  if (content instanceof node_1.default) {
207
204
  content = [content];
208
205
  }
209
206
  else if (typeof content == 'string') {
210
- var r = parse(content);
207
+ var r = parse(content, options);
211
208
  content = r.childNodes.length ? r.childNodes : [new text_1.default(content)];
212
209
  }
213
210
  this.childNodes = content;
@@ -325,7 +322,7 @@ var HTMLElement = /** @class */ (function (_super) {
325
322
  return this.childNodes.reduce(function (res, cur) {
326
323
  stack.push([cur, 0, false]);
327
324
  while (stack.length) {
328
- var state = back_1.default(stack);
325
+ var state = back_1.default(stack); // get last element
329
326
  var el = state[0];
330
327
  if (state[1] === 0) {
331
328
  // Seen for first time.
@@ -333,10 +330,12 @@ var HTMLElement = /** @class */ (function (_super) {
333
330
  stack.pop();
334
331
  continue;
335
332
  }
336
- state[2] = matcher.advance(el);
333
+ var html_el = el;
334
+ state[2] = matcher.advance(html_el);
337
335
  if (state[2]) {
338
336
  if (matcher.matched) {
339
- res.push(el);
337
+ res.push(html_el);
338
+ res.push.apply(res, (html_el.querySelectorAll(selector)));
340
339
  // no need to go further.
341
340
  matcher.rewind();
342
341
  stack.pop();
@@ -634,6 +633,7 @@ var kBlockTextElements = {
634
633
  style: true,
635
634
  pre: true
636
635
  };
636
+ var frameflag = 'documentfragmentcontainer';
637
637
  /**
638
638
  * Parses HTML and returns a root element
639
639
  * Parse a chuck of HTML source.
@@ -647,6 +647,8 @@ function parse(data, options) {
647
647
  var stack = [root];
648
648
  var lastTextPos = -1;
649
649
  var match;
650
+ // https://github.com/taoqf/node-html-parser/issues/38
651
+ data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
650
652
  var _loop_1 = function () {
651
653
  if (lastTextPos > -1) {
652
654
  if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
@@ -656,6 +658,9 @@ function parse(data, options) {
656
658
  }
657
659
  }
658
660
  lastTextPos = kMarkupPattern.lastIndex;
661
+ if (match[2] === frameflag) {
662
+ return "continue";
663
+ }
659
664
  if (match[0][1] === '!') {
660
665
  // this is a comment
661
666
  if (options.comment) {
@@ -665,8 +670,9 @@ function parse(data, options) {
665
670
  }
666
671
  return "continue";
667
672
  }
668
- if (options.lowerCaseTagName)
673
+ if (options.lowerCaseTagName) {
669
674
  match[2] = match[2].toLowerCase();
675
+ }
670
676
  if (!match[1]) {
671
677
  // not </ tags
672
678
  var attrs = {};
@@ -680,6 +686,8 @@ function parse(data, options) {
680
686
  currentParent = back_1.default(stack);
681
687
  }
682
688
  }
689
+ // ignore container tag we add above
690
+ // https://github.com/taoqf/node-html-parser/issues/38
683
691
  currentParent = currentParent.appendChild(new HTMLElement(match[2], attrs, match[3]));
684
692
  stack.push(currentParent);
685
693
  if (kBlockTextElements[match[2]]) {
@@ -715,8 +723,7 @@ function parse(data, options) {
715
723
  }
716
724
  }
717
725
  }
718
- if (match[1] || match[4] ||
719
- kSelfClosingElements[match[2]]) {
726
+ if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
720
727
  // </ or /> or <br> etc.
721
728
  while (true) {
722
729
  if (currentParent.tagName === match[2]) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "1.2.12",
3
+ "version": "1.2.16",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",