node-html-parser 4.1.3 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -34,7 +34,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
34
34
  ar[i] = from[i];
35
35
  }
36
36
  }
37
- return to.concat(ar || from);
37
+ return to.concat(ar || Array.prototype.slice.call(from));
38
38
  };
39
39
  define("back", ["require", "exports"], function (require, exports) {
40
40
  "use strict";
@@ -293,89 +293,40 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
293
293
  matcher_1 = __importDefault(matcher_1);
294
294
  back_1 = __importDefault(back_1);
295
295
  comment_1 = __importDefault(comment_1);
296
- // const { decode } = he;
296
+ var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
297
297
  function decode(val) {
298
298
  // clone string
299
299
  return JSON.parse(JSON.stringify(he_2.default.decode(val)));
300
300
  }
301
301
  // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
302
+ var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
303
+ var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
304
+ var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
305
+ var tableTags = ['table', 'td', 'tr'];
306
+ var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
302
307
  var kBlockElements = new Set();
303
- kBlockElements.add('address');
304
- kBlockElements.add('ADDRESS');
305
- kBlockElements.add('article');
306
- kBlockElements.add('ARTICLE');
307
- kBlockElements.add('aside');
308
- kBlockElements.add('ASIDE');
309
- kBlockElements.add('blockquote');
310
- kBlockElements.add('BLOCKQUOTE');
311
- kBlockElements.add('br');
312
- kBlockElements.add('BR');
313
- kBlockElements.add('details');
314
- kBlockElements.add('DETAILS');
315
- kBlockElements.add('dialog');
316
- kBlockElements.add('DIALOG');
317
- kBlockElements.add('dd');
318
- kBlockElements.add('DD');
319
- kBlockElements.add('div');
320
- kBlockElements.add('DIV');
321
- kBlockElements.add('dl');
322
- kBlockElements.add('DL');
323
- kBlockElements.add('dt');
324
- kBlockElements.add('DT');
325
- kBlockElements.add('fieldset');
326
- kBlockElements.add('FIELDSET');
327
- kBlockElements.add('figcaption');
328
- kBlockElements.add('FIGCAPTION');
329
- kBlockElements.add('figure');
330
- kBlockElements.add('FIGURE');
331
- kBlockElements.add('footer');
332
- kBlockElements.add('FOOTER');
333
- kBlockElements.add('form');
334
- kBlockElements.add('FORM');
335
- kBlockElements.add('h1');
336
- kBlockElements.add('H1');
337
- kBlockElements.add('h2');
338
- kBlockElements.add('H2');
339
- kBlockElements.add('h3');
340
- kBlockElements.add('H3');
341
- kBlockElements.add('h4');
342
- kBlockElements.add('H4');
343
- kBlockElements.add('h5');
344
- kBlockElements.add('H5');
345
- kBlockElements.add('h6');
346
- kBlockElements.add('H6');
347
- kBlockElements.add('header');
348
- kBlockElements.add('HEADER');
349
- kBlockElements.add('hgroup');
350
- kBlockElements.add('HGROUP');
351
- kBlockElements.add('hr');
352
- kBlockElements.add('HR');
353
- kBlockElements.add('li');
354
- kBlockElements.add('LI');
355
- kBlockElements.add('main');
356
- kBlockElements.add('MAIN');
357
- kBlockElements.add('nav');
358
- kBlockElements.add('NAV');
359
- kBlockElements.add('ol');
360
- kBlockElements.add('OL');
361
- kBlockElements.add('p');
362
- kBlockElements.add('P');
363
- kBlockElements.add('pre');
364
- kBlockElements.add('PRE');
365
- kBlockElements.add('section');
366
- kBlockElements.add('SECTION');
367
- kBlockElements.add('table');
368
- kBlockElements.add('TABLE');
369
- kBlockElements.add('td');
370
- kBlockElements.add('TD');
371
- kBlockElements.add('tr');
372
- kBlockElements.add('TR');
373
- kBlockElements.add('ul');
374
- kBlockElements.add('UL');
308
+ function addToKBlockElement() {
309
+ var args = [];
310
+ for (var _i = 0; _i < arguments.length; _i++) {
311
+ args[_i] = arguments[_i];
312
+ }
313
+ var addToSet = function (array) {
314
+ for (var index = 0; index < array.length; index++) {
315
+ var element = array[index];
316
+ kBlockElements.add(element);
317
+ kBlockElements.add(element.toUpperCase());
318
+ }
319
+ };
320
+ for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
321
+ var arg = args_1[_a];
322
+ addToSet(arg);
323
+ }
324
+ }
325
+ addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
375
326
  var DOMTokenList = /** @class */ (function () {
376
327
  function DOMTokenList(valuesInit, afterUpdate) {
377
328
  if (valuesInit === void 0) { valuesInit = []; }
378
- if (afterUpdate === void 0) { afterUpdate = (function () { return null; }); }
329
+ if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
379
330
  this._set = new Set(valuesInit);
380
331
  this._afterUpdate = afterUpdate;
381
332
  }
@@ -396,8 +347,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
396
347
  this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
397
348
  };
398
349
  DOMTokenList.prototype.remove = function (c) {
399
- this._set.delete(c) &&
400
- this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
350
+ this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
401
351
  };
402
352
  DOMTokenList.prototype.toggle = function (c) {
403
353
  this._validate(c);
@@ -462,8 +412,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
462
412
  _this.rawAttrs = rawAttrs || '';
463
413
  _this.id = keyAttrs.id || '';
464
414
  _this.childNodes = [];
465
- _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return (_this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
466
- ); });
415
+ _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
416
+ );
467
417
  if (keyAttrs.id) {
468
418
  if (!rawAttrs) {
469
419
  _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
@@ -488,8 +438,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
488
438
  * @returns {string} quoted value
489
439
  */
490
440
  HTMLElement.prototype.quoteAttribute = function (attr) {
491
- if (attr === null) {
492
- return "null";
441
+ if (attr == null) {
442
+ return 'null';
493
443
  }
494
444
  return JSON.stringify(attr.replace(/"/g, '&quot;'));
495
445
  };
@@ -511,7 +461,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
511
461
  */
512
462
  HTMLElement.prototype.removeChild = function (node) {
513
463
  this.childNodes = this.childNodes.filter(function (child) {
514
- return (child !== node);
464
+ return child !== node;
515
465
  });
516
466
  };
517
467
  /**
@@ -542,6 +492,13 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
542
492
  enumerable: false,
543
493
  configurable: true
544
494
  });
495
+ Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
496
+ get: function () {
497
+ return voidTags.has(this.localName);
498
+ },
499
+ enumerable: false,
500
+ configurable: true
501
+ });
545
502
  Object.defineProperty(HTMLElement.prototype, "rawText", {
546
503
  /**
547
504
  * Get escpaed (as-it) text value of current node and its children.
@@ -589,11 +546,11 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
589
546
  if (node.nodeType === type_3.default.ELEMENT_NODE) {
590
547
  if (kBlockElements.has(node.rawTagName)) {
591
548
  if (currentBlock.length > 0) {
592
- blocks.push(currentBlock = []);
549
+ blocks.push((currentBlock = []));
593
550
  }
594
551
  node.childNodes.forEach(dfs);
595
552
  if (currentBlock.length > 0) {
596
- blocks.push(currentBlock = []);
553
+ blocks.push((currentBlock = []));
597
554
  }
598
555
  }
599
556
  else {
@@ -616,11 +573,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
616
573
  }
617
574
  }
618
575
  dfs(this);
619
- return blocks.map(function (block) {
620
- // Normalize each line's whitespace
621
- return block.join('').replace(/\s{2,}/g, ' ');
576
+ return blocks
577
+ .map(function (block) {
578
+ return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
622
579
  })
623
- .join('\n').replace(/\s+$/, ''); // trimRight;
580
+ .join('\n')
581
+ .replace(/\s+$/, ''); // trimRight;
624
582
  },
625
583
  enumerable: false,
626
584
  configurable: true
@@ -628,22 +586,18 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
628
586
  HTMLElement.prototype.toString = function () {
629
587
  var tag = this.rawTagName;
630
588
  if (tag) {
631
- // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
632
- // const is_void = void_tags.has(tag);
633
- var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
634
589
  var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
635
- if (is_void) {
636
- return "<" + tag + attrs + ">";
637
- }
638
- return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
590
+ return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
639
591
  }
640
592
  return this.innerHTML;
641
593
  };
642
594
  Object.defineProperty(HTMLElement.prototype, "innerHTML", {
643
595
  get: function () {
644
- return this.childNodes.map(function (child) {
596
+ return this.childNodes
597
+ .map(function (child) {
645
598
  return child.toString();
646
- }).join('');
599
+ })
600
+ .join('');
647
601
  },
648
602
  set: function (content) {
649
603
  //const r = parse(content, global.options); // TODO global.options ?
@@ -670,7 +624,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
670
624
  for (var _i = 0; _i < arguments.length; _i++) {
671
625
  nodes[_i] = arguments[_i];
672
626
  }
673
- var content = nodes.map(function (node) {
627
+ var content = nodes
628
+ .map(function (node) {
674
629
  if (node instanceof node_2.default) {
675
630
  return [node];
676
631
  }
@@ -680,7 +635,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
680
635
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
681
636
  }
682
637
  return [];
683
- }).flat();
638
+ })
639
+ .flat();
684
640
  var idx = this.parentNode.childNodes.findIndex(function (child) {
685
641
  return child === _this;
686
642
  });
@@ -727,8 +683,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
727
683
  res.push(' '.repeat(indention) + str);
728
684
  }
729
685
  function dfs(node) {
730
- var idStr = node.id ? ("#" + node.id) : '';
731
- var classStr = node.classList.length ? ("." + node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
686
+ var idStr = node.id ? "#" + node.id : '';
687
+ var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
732
688
  write("" + node.rawTagName + idStr + classStr);
733
689
  indention++;
734
690
  node.childNodes.forEach(function (childNode) {
@@ -779,113 +735,57 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
779
735
  HTMLElement.prototype.querySelectorAll = function (selector) {
780
736
  return (0, css_select_1.selectAll)(selector, this, {
781
737
  xmlMode: true,
782
- adapter: matcher_1.default
738
+ adapter: matcher_1.default,
783
739
  });
784
- // let matcher: Matcher;
785
- // if (selector instanceof Matcher) {
786
- // matcher = selector;
787
- // matcher.reset();
788
- // } else {
789
- // if (selector.includes(',')) {
790
- // const selectors = selector.split(',');
791
- // return Array.from(selectors.reduce((pre, cur) => {
792
- // const result = this.querySelectorAll(cur.trim());
793
- // return result.reduce((p, c) => {
794
- // return p.add(c);
795
- // }, pre);
796
- // }, new Set<HTMLElement>()));
797
- // }
798
- // matcher = new Matcher(selector);
799
- // }
800
- // interface IStack {
801
- // 0: Node; // node
802
- // 1: number; // children
803
- // 2: boolean; // found flag
804
- // }
805
- // const stack = [] as IStack[];
806
- // return this.childNodes.reduce((res, cur) => {
807
- // stack.push([cur, 0, false]);
808
- // while (stack.length) {
809
- // const state = arr_back(stack); // get last element
810
- // const el = state[0];
811
- // if (state[1] === 0) {
812
- // // Seen for first time.
813
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
814
- // stack.pop();
815
- // continue;
816
- // }
817
- // const html_el = el as HTMLElement;
818
- // state[2] = matcher.advance(html_el);
819
- // if (state[2]) {
820
- // if (matcher.matched) {
821
- // res.push(html_el);
822
- // res.push(...(html_el.querySelectorAll(selector)));
823
- // // no need to go further.
824
- // matcher.rewind();
825
- // stack.pop();
826
- // continue;
827
- // }
828
- // }
829
- // }
830
- // if (state[1] < el.childNodes.length) {
831
- // stack.push([el.childNodes[state[1]++], 0, false]);
832
- // } else {
833
- // if (state[2]) {
834
- // matcher.rewind();
835
- // }
836
- // stack.pop();
837
- // }
838
- // }
839
- // return res;
840
- // }, [] as HTMLElement[]);
841
740
  };
842
741
  /**
843
742
  * Query CSS Selector to find matching node.
844
743
  * @param {string} selector Simplified CSS selector
845
- * @return {HTMLElement} matching node
744
+ * @return {(HTMLElement|null)} matching node
846
745
  */
847
746
  HTMLElement.prototype.querySelector = function (selector) {
848
747
  return (0, css_select_1.selectOne)(selector, this, {
849
748
  xmlMode: true,
850
- adapter: matcher_1.default
749
+ adapter: matcher_1.default,
851
750
  });
852
- // let matcher: Matcher;
853
- // if (selector instanceof Matcher) {
854
- // matcher = selector;
855
- // matcher.reset();
856
- // } else {
857
- // matcher = new Matcher(selector);
858
- // }
859
- // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
860
- // for (const node of this.childNodes) {
861
- // stack.push([node, 0, false]);
862
- // while (stack.length) {
863
- // const state = arr_back(stack);
864
- // const el = state[0];
865
- // if (state[1] === 0) {
866
- // // Seen for first time.
867
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
868
- // stack.pop();
869
- // continue;
870
- // }
871
- // state[2] = matcher.advance(el as HTMLElement);
872
- // if (state[2]) {
873
- // if (matcher.matched) {
874
- // return el as HTMLElement;
875
- // }
876
- // }
877
- // }
878
- // if (state[1] < el.childNodes.length) {
879
- // stack.push([el.childNodes[state[1]++], 0, false]);
880
- // } else {
881
- // if (state[2]) {
882
- // matcher.rewind();
883
- // }
884
- // stack.pop();
885
- // }
886
- // }
887
- // }
888
- // return null;
751
+ };
752
+ /**
753
+ * find elements by their tagName
754
+ * @param {string} tagName the tagName of the elements to select
755
+ */
756
+ HTMLElement.prototype.getElementsByTagName = function (tagName) {
757
+ var upperCasedTagName = tagName.toUpperCase();
758
+ var re = [];
759
+ var stack = [];
760
+ var currentNodeReference = this;
761
+ var index = 0;
762
+ // index turns to undefined once the stack is empty and the first condition occurs
763
+ // which happens once all relevant children are searched through
764
+ while (index !== undefined) {
765
+ var child = void 0;
766
+ // make it work with sparse arrays
767
+ do {
768
+ child = currentNodeReference.childNodes[index++];
769
+ } while (index < currentNodeReference.childNodes.length && child === undefined);
770
+ // if the child does not exist we move on with the last provided index (which belongs to the parentNode)
771
+ if (child === undefined) {
772
+ currentNodeReference = currentNodeReference.parentNode;
773
+ index = stack.pop();
774
+ continue;
775
+ }
776
+ if (child.nodeType === type_3.default.ELEMENT_NODE) {
777
+ // https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
778
+ if (tagName === '*' || child.tagName === upperCasedTagName)
779
+ re.push(child);
780
+ // if children are existing push the current status to the stack and keep searching for elements in the level below
781
+ if (child.childNodes.length > 0) {
782
+ stack.push(index);
783
+ currentNodeReference = child;
784
+ index = 0;
785
+ }
786
+ }
787
+ }
788
+ return re;
889
789
  };
890
790
  /**
891
791
  * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
@@ -927,7 +827,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
927
827
  return [node];
928
828
  }, findOne: findOne, findAll: function () {
929
829
  return [];
930
- } })
830
+ } }),
931
831
  });
932
832
  if (e) {
933
833
  return e;
@@ -1005,7 +905,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1005
905
  });
1006
906
  Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
1007
907
  /**
1008
- * Get escaped (as-it) attributes
908
+ * Get escaped (as-is) attributes
1009
909
  * @return {Object} parsed attributes
1010
910
  */
1011
911
  get: function () {
@@ -1014,10 +914,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1014
914
  }
1015
915
  var attrs = {};
1016
916
  if (this.rawAttrs) {
1017
- var re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
917
+ var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1018
918
  var match = void 0;
1019
919
  while ((match = re.exec(this.rawAttrs))) {
1020
- attrs[match[1]] = match[2] || match[3] || match[4] || null;
920
+ var key = match[1];
921
+ var val = match[2] || null;
922
+ if (val && (val[0] === "'" || val[0] === "\""))
923
+ val = val.slice(1, val.length - 1);
924
+ attrs[key] = val;
1021
925
  }
1022
926
  }
1023
927
  this._rawAttrs = attrs;
@@ -1034,13 +938,15 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1034
938
  delete this._attrs[key];
1035
939
  }
1036
940
  // Update rawString
1037
- this.rawAttrs = Object.keys(attrs).map(function (name) {
941
+ this.rawAttrs = Object.keys(attrs)
942
+ .map(function (name) {
1038
943
  var val = JSON.stringify(attrs[name]);
1039
944
  if (val === undefined || val === 'null') {
1040
945
  return name;
1041
946
  }
1042
947
  return name + "=" + val;
1043
- }).join(' ');
948
+ })
949
+ .join(' ');
1044
950
  // Update this.id
1045
951
  if (key === 'id') {
1046
952
  this.id = '';
@@ -1064,7 +970,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1064
970
  HTMLElement.prototype.setAttribute = function (key, value) {
1065
971
  var _this = this;
1066
972
  if (arguments.length < 2) {
1067
- throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
973
+ throw new Error("Failed to execute 'setAttribute' on 'Element'");
1068
974
  }
1069
975
  var k2 = key.toLowerCase();
1070
976
  var attrs = this.rawAttributes;
@@ -1080,13 +986,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1080
986
  this._attrs[k2] = decode(attrs[key]);
1081
987
  }
1082
988
  // Update rawString
1083
- this.rawAttrs = Object.keys(attrs).map(function (name) {
989
+ this.rawAttrs = Object.keys(attrs)
990
+ .map(function (name) {
1084
991
  var val = _this.quoteAttribute(attrs[name]);
1085
- if (val === 'null' || val === '""') {
992
+ if (val === 'null' || val === '""')
1086
993
  return name;
1087
- }
1088
994
  return name + "=" + val;
1089
- }).join(' ');
995
+ })
996
+ .join(' ');
1090
997
  // Update this.id
1091
998
  if (key === 'id') {
1092
999
  this.id = value;
@@ -1107,13 +1014,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1107
1014
  delete this._rawAttrs;
1108
1015
  }
1109
1016
  // Update rawString
1110
- this.rawAttrs = Object.keys(attributes).map(function (name) {
1017
+ this.rawAttrs = Object.keys(attributes)
1018
+ .map(function (name) {
1111
1019
  var val = attributes[name];
1112
- if (val === 'null' || val === '""') {
1020
+ if (val === 'null' || val === '""')
1113
1021
  return name;
1114
- }
1115
1022
  return name + "=" + _this.quoteAttribute(String(val));
1116
- }).join(' ');
1023
+ })
1024
+ .join(' ');
1117
1025
  };
1118
1026
  HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
1119
1027
  var _a, _b, _c;
@@ -1166,9 +1074,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1166
1074
  var i = 0;
1167
1075
  while (i < children.length) {
1168
1076
  var child = children[i++];
1169
- if (this === child) {
1077
+ if (this === child)
1170
1078
  return children[i] || null;
1171
- }
1172
1079
  }
1173
1080
  return null;
1174
1081
  }
@@ -1210,12 +1117,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1210
1117
  }(node_2.default));
1211
1118
  exports.default = HTMLElement;
1212
1119
  // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
1213
- var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
1214
- // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
1215
- // <([a-z][-.:0-9_a-z]*)\s*\/>
1216
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
1217
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
1218
- var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
1120
+ var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
1121
+ var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
1219
1122
  var kSelfClosingElements = {
1220
1123
  area: true,
1221
1124
  AREA: true,
@@ -1244,7 +1147,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1244
1147
  track: true,
1245
1148
  TRACK: true,
1246
1149
  wbr: true,
1247
- WBR: true
1150
+ WBR: true,
1248
1151
  };
1249
1152
  var kElementsClosedByOpening = {
1250
1153
  li: { li: true, LI: true },
@@ -1268,7 +1171,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1268
1171
  h5: { h5: true, H5: true },
1269
1172
  H5: { h5: true, H5: true },
1270
1173
  h6: { h6: true, H6: true },
1271
- H6: { h6: true, H6: true }
1174
+ H6: { h6: true, H6: true },
1272
1175
  };
1273
1176
  var kElementsClosedByClosing = {
1274
1177
  li: { ul: true, ol: true, UL: true, OL: true },
@@ -1284,7 +1187,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1284
1187
  td: { tr: true, table: true, TR: true, TABLE: true },
1285
1188
  TD: { tr: true, table: true, TR: true, TABLE: true },
1286
1189
  th: { tr: true, table: true, TR: true, TABLE: true },
1287
- TH: { tr: true, table: true, TR: true, TABLE: true }
1190
+ TH: { tr: true, table: true, TR: true, TABLE: true },
1288
1191
  };
1289
1192
  var frameflag = 'documentfragmentcontainer';
1290
1193
  /**
@@ -1299,45 +1202,39 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1299
1202
  script: true,
1300
1203
  noscript: true,
1301
1204
  style: true,
1302
- pre: true
1205
+ pre: true,
1303
1206
  };
1304
1207
  var element_names = Object.keys(elements);
1305
- var kBlockTextElements = element_names.map(function (it) {
1306
- return new RegExp(it, 'i');
1307
- });
1308
- var kIgnoreElements = element_names.filter(function (it) {
1309
- return elements[it];
1310
- }).map(function (it) {
1311
- return new RegExp(it, 'i');
1312
- });
1208
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1209
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1313
1210
  function element_should_be_ignore(tag) {
1314
- return kIgnoreElements.some(function (it) {
1315
- return it.test(tag);
1316
- });
1211
+ return kIgnoreElements.some(function (it) { return it.test(tag); });
1317
1212
  }
1318
1213
  function is_block_text_element(tag) {
1319
- return kBlockTextElements.some(function (it) {
1320
- return it.test(tag);
1321
- });
1214
+ return kBlockTextElements.some(function (it) { return it.test(tag); });
1322
1215
  }
1323
- var createRange = function (startPos, endPos) {
1324
- return [startPos - frameFlagOffset, endPos - frameFlagOffset];
1325
- };
1216
+ var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1326
1217
  var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1327
1218
  var currentParent = root;
1328
1219
  var stack = [root];
1329
1220
  var lastTextPos = -1;
1221
+ var noNestedTagIndex = undefined;
1330
1222
  var match;
1331
1223
  // https://github.com/taoqf/node-html-parser/issues/38
1332
1224
  data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1225
+ var lowerCaseTagName = options.lowerCaseTagName;
1333
1226
  var dataEndPos = data.length - (frameflag.length + 2);
1334
1227
  var frameFlagOffset = frameflag.length + 2;
1335
1228
  while ((match = kMarkupPattern.exec(data))) {
1336
- var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
1229
+ // Note: Object destructuring here consistently tests as higher performance than array destructuring
1230
+ // eslint-disable-next-line prefer-const
1231
+ var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
1232
+ var matchLength = matchText.length;
1233
+ var tagStartPos = kMarkupPattern.lastIndex - matchLength;
1337
1234
  var tagEndPos = kMarkupPattern.lastIndex;
1338
1235
  // Add TextNode if content
1339
1236
  if (lastTextPos > -1) {
1340
- if (lastTextPos + match[0].length < tagEndPos) {
1237
+ if (lastTextPos + matchLength < tagEndPos) {
1341
1238
  var text = data.substring(lastTextPos, tagStartPos);
1342
1239
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
1343
1240
  }
@@ -1345,10 +1242,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1345
1242
  lastTextPos = kMarkupPattern.lastIndex;
1346
1243
  // https://github.com/taoqf/node-html-parser/issues/38
1347
1244
  // Skip frameflag node
1348
- if (match[2] === frameflag)
1245
+ if (tagName === frameflag)
1349
1246
  continue;
1350
1247
  // Handle comments
1351
- if (match[0][1] === '!') {
1248
+ if (matchText[1] === '!') {
1352
1249
  if (options.comment) {
1353
1250
  // Only keep what is in between <!-- and -->
1354
1251
  var text = data.substring(tagStartPos + 4, tagEndPos - 3);
@@ -1358,36 +1255,46 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1358
1255
  }
1359
1256
  /* -- Handle tag matching -- */
1360
1257
  // Fix tag casing if necessary
1361
- if (options.lowerCaseTagName)
1362
- match[2] = match[2].toLowerCase();
1258
+ if (lowerCaseTagName)
1259
+ tagName = tagName.toLowerCase();
1363
1260
  // Handle opening tags (ie. <this> not </that>)
1364
- if (!match[1]) {
1261
+ if (!leadingSlash) {
1365
1262
  /* Populate attributes */
1366
1263
  var attrs = {};
1367
- for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
1368
- attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
1264
+ for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
1265
+ var key = attMatch[1], val = attMatch[2];
1266
+ var isQuoted = val[0] === "'" || val[0] === "\"";
1267
+ attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
1369
1268
  }
1370
- var tagName = currentParent.rawTagName;
1371
- if (!match[4] && kElementsClosedByOpening[tagName]) {
1372
- if (kElementsClosedByOpening[tagName][match[2]]) {
1269
+ var parentTagName = currentParent.rawTagName;
1270
+ if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
1271
+ if (kElementsClosedByOpening[parentTagName][tagName]) {
1373
1272
  stack.pop();
1374
1273
  currentParent = (0, back_1.default)(stack);
1375
1274
  }
1376
1275
  }
1276
+ // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1277
+ if (tagName === 'a' || tagName === 'A') {
1278
+ if (noNestedTagIndex !== undefined) {
1279
+ stack.splice(noNestedTagIndex);
1280
+ currentParent = (0, back_1.default)(stack);
1281
+ }
1282
+ noNestedTagIndex = stack.length;
1283
+ }
1377
1284
  var tagEndPos_1 = kMarkupPattern.lastIndex;
1378
- var tagStartPos_1 = tagEndPos_1 - match[0].length;
1285
+ var tagStartPos_1 = tagEndPos_1 - matchLength;
1379
1286
  currentParent = currentParent.appendChild(
1380
1287
  // Initialize range (end position updated later for closed tags)
1381
- new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
1288
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1382
1289
  stack.push(currentParent);
1383
- if (is_block_text_element(match[2])) {
1290
+ if (is_block_text_element(tagName)) {
1384
1291
  // Find closing tag
1385
- var closeMarkup = "</" + match[2] + ">";
1386
- var closeIndex = options.lowerCaseTagName
1292
+ var closeMarkup = "</" + tagName + ">";
1293
+ var closeIndex = lowerCaseTagName
1387
1294
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1388
1295
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
1389
1296
  var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
1390
- if (element_should_be_ignore(match[2])) {
1297
+ if (element_should_be_ignore(tagName)) {
1391
1298
  var text = data.substring(tagEndPos_1, textEndPos);
1392
1299
  if (text.length > 0 && /\S/.test(text)) {
1393
1300
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
@@ -1399,14 +1306,16 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1399
1306
  else {
1400
1307
  lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
1401
1308
  // Cause to be treated as self-closing, because no close found
1402
- match[1] = 'true';
1309
+ leadingSlash = '/';
1403
1310
  }
1404
1311
  }
1405
1312
  }
1406
1313
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1407
- if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1314
+ if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1408
1315
  while (true) {
1409
- if (currentParent.rawTagName === match[2]) {
1316
+ if (tagName === 'a' || tagName === 'A')
1317
+ noNestedTagIndex = undefined;
1318
+ if (currentParent.rawTagName === tagName) {
1410
1319
  // Update range end for closed tag
1411
1320
  currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1412
1321
  stack.pop();
@@ -1414,10 +1323,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1414
1323
  break;
1415
1324
  }
1416
1325
  else {
1417
- var tagName = currentParent.tagName;
1326
+ var parentTagName = currentParent.tagName;
1418
1327
  // Trying to close current tag, and move on
1419
- if (kElementsClosedByClosing[tagName]) {
1420
- if (kElementsClosedByClosing[tagName][match[2]]) {
1328
+ if (kElementsClosedByClosing[parentTagName]) {
1329
+ if (kElementsClosedByClosing[parentTagName][tagName]) {
1421
1330
  stack.pop();
1422
1331
  currentParent = (0, back_1.default)(stack);
1423
1332
  continue;