node-html-parser 4.1.4 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/main.js CHANGED
@@ -34,7 +34,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
34
34
  ar[i] = from[i];
35
35
  }
36
36
  }
37
- return to.concat(ar || from);
37
+ return to.concat(ar || Array.prototype.slice.call(from));
38
38
  };
39
39
  define("back", ["require", "exports"], function (require, exports) {
40
40
  "use strict";
@@ -293,89 +293,40 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
293
293
  matcher_1 = __importDefault(matcher_1);
294
294
  back_1 = __importDefault(back_1);
295
295
  comment_1 = __importDefault(comment_1);
296
- // const { decode } = he;
296
+ var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
297
297
  function decode(val) {
298
298
  // clone string
299
299
  return JSON.parse(JSON.stringify(he_2.default.decode(val)));
300
300
  }
301
301
  // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
302
+ var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
303
+ var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
304
+ var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
305
+ var tableTags = ['table', 'td', 'tr'];
306
+ var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
302
307
  var kBlockElements = new Set();
303
- kBlockElements.add('address');
304
- kBlockElements.add('ADDRESS');
305
- kBlockElements.add('article');
306
- kBlockElements.add('ARTICLE');
307
- kBlockElements.add('aside');
308
- kBlockElements.add('ASIDE');
309
- kBlockElements.add('blockquote');
310
- kBlockElements.add('BLOCKQUOTE');
311
- kBlockElements.add('br');
312
- kBlockElements.add('BR');
313
- kBlockElements.add('details');
314
- kBlockElements.add('DETAILS');
315
- kBlockElements.add('dialog');
316
- kBlockElements.add('DIALOG');
317
- kBlockElements.add('dd');
318
- kBlockElements.add('DD');
319
- kBlockElements.add('div');
320
- kBlockElements.add('DIV');
321
- kBlockElements.add('dl');
322
- kBlockElements.add('DL');
323
- kBlockElements.add('dt');
324
- kBlockElements.add('DT');
325
- kBlockElements.add('fieldset');
326
- kBlockElements.add('FIELDSET');
327
- kBlockElements.add('figcaption');
328
- kBlockElements.add('FIGCAPTION');
329
- kBlockElements.add('figure');
330
- kBlockElements.add('FIGURE');
331
- kBlockElements.add('footer');
332
- kBlockElements.add('FOOTER');
333
- kBlockElements.add('form');
334
- kBlockElements.add('FORM');
335
- kBlockElements.add('h1');
336
- kBlockElements.add('H1');
337
- kBlockElements.add('h2');
338
- kBlockElements.add('H2');
339
- kBlockElements.add('h3');
340
- kBlockElements.add('H3');
341
- kBlockElements.add('h4');
342
- kBlockElements.add('H4');
343
- kBlockElements.add('h5');
344
- kBlockElements.add('H5');
345
- kBlockElements.add('h6');
346
- kBlockElements.add('H6');
347
- kBlockElements.add('header');
348
- kBlockElements.add('HEADER');
349
- kBlockElements.add('hgroup');
350
- kBlockElements.add('HGROUP');
351
- kBlockElements.add('hr');
352
- kBlockElements.add('HR');
353
- kBlockElements.add('li');
354
- kBlockElements.add('LI');
355
- kBlockElements.add('main');
356
- kBlockElements.add('MAIN');
357
- kBlockElements.add('nav');
358
- kBlockElements.add('NAV');
359
- kBlockElements.add('ol');
360
- kBlockElements.add('OL');
361
- kBlockElements.add('p');
362
- kBlockElements.add('P');
363
- kBlockElements.add('pre');
364
- kBlockElements.add('PRE');
365
- kBlockElements.add('section');
366
- kBlockElements.add('SECTION');
367
- kBlockElements.add('table');
368
- kBlockElements.add('TABLE');
369
- kBlockElements.add('td');
370
- kBlockElements.add('TD');
371
- kBlockElements.add('tr');
372
- kBlockElements.add('TR');
373
- kBlockElements.add('ul');
374
- kBlockElements.add('UL');
308
+ function addToKBlockElement() {
309
+ var args = [];
310
+ for (var _i = 0; _i < arguments.length; _i++) {
311
+ args[_i] = arguments[_i];
312
+ }
313
+ var addToSet = function (array) {
314
+ for (var index = 0; index < array.length; index++) {
315
+ var element = array[index];
316
+ kBlockElements.add(element);
317
+ kBlockElements.add(element.toUpperCase());
318
+ }
319
+ };
320
+ for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
321
+ var arg = args_1[_a];
322
+ addToSet(arg);
323
+ }
324
+ }
325
+ addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
375
326
  var DOMTokenList = /** @class */ (function () {
376
327
  function DOMTokenList(valuesInit, afterUpdate) {
377
328
  if (valuesInit === void 0) { valuesInit = []; }
378
- if (afterUpdate === void 0) { afterUpdate = (function () { return null; }); }
329
+ if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
379
330
  this._set = new Set(valuesInit);
380
331
  this._afterUpdate = afterUpdate;
381
332
  }
@@ -396,8 +347,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
396
347
  this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
397
348
  };
398
349
  DOMTokenList.prototype.remove = function (c) {
399
- this._set.delete(c) &&
400
- this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
350
+ this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
401
351
  };
402
352
  DOMTokenList.prototype.toggle = function (c) {
403
353
  this._validate(c);
@@ -462,8 +412,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
462
412
  _this.rawAttrs = rawAttrs || '';
463
413
  _this.id = keyAttrs.id || '';
464
414
  _this.childNodes = [];
465
- _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return (_this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
466
- ); });
415
+ _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
416
+ );
467
417
  if (keyAttrs.id) {
468
418
  if (!rawAttrs) {
469
419
  _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
@@ -488,8 +438,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
488
438
  * @returns {string} quoted value
489
439
  */
490
440
  HTMLElement.prototype.quoteAttribute = function (attr) {
491
- if (attr === null) {
492
- return "null";
441
+ if (attr == null) {
442
+ return 'null';
493
443
  }
494
444
  return JSON.stringify(attr.replace(/"/g, '&quot;'));
495
445
  };
@@ -511,7 +461,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
511
461
  */
512
462
  HTMLElement.prototype.removeChild = function (node) {
513
463
  this.childNodes = this.childNodes.filter(function (child) {
514
- return (child !== node);
464
+ return child !== node;
515
465
  });
516
466
  };
517
467
  /**
@@ -532,6 +482,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
532
482
  get: function () {
533
483
  return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
534
484
  },
485
+ set: function (newname) {
486
+ this.rawTagName = newname.toLowerCase();
487
+ },
535
488
  enumerable: false,
536
489
  configurable: true
537
490
  });
@@ -542,6 +495,13 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
542
495
  enumerable: false,
543
496
  configurable: true
544
497
  });
498
+ Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
499
+ get: function () {
500
+ return voidTags.has(this.localName);
501
+ },
502
+ enumerable: false,
503
+ configurable: true
504
+ });
545
505
  Object.defineProperty(HTMLElement.prototype, "rawText", {
546
506
  /**
547
507
  * Get escpaed (as-it) text value of current node and its children.
@@ -589,11 +549,11 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
589
549
  if (node.nodeType === type_3.default.ELEMENT_NODE) {
590
550
  if (kBlockElements.has(node.rawTagName)) {
591
551
  if (currentBlock.length > 0) {
592
- blocks.push(currentBlock = []);
552
+ blocks.push((currentBlock = []));
593
553
  }
594
554
  node.childNodes.forEach(dfs);
595
555
  if (currentBlock.length > 0) {
596
- blocks.push(currentBlock = []);
556
+ blocks.push((currentBlock = []));
597
557
  }
598
558
  }
599
559
  else {
@@ -616,11 +576,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
616
576
  }
617
577
  }
618
578
  dfs(this);
619
- return blocks.map(function (block) {
620
- // Normalize each line's whitespace
621
- return block.join('').replace(/\s{2,}/g, ' ');
579
+ return blocks
580
+ .map(function (block) {
581
+ return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
622
582
  })
623
- .join('\n').replace(/\s+$/, ''); // trimRight;
583
+ .join('\n')
584
+ .replace(/\s+$/, ''); // trimRight;
624
585
  },
625
586
  enumerable: false,
626
587
  configurable: true
@@ -628,22 +589,18 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
628
589
  HTMLElement.prototype.toString = function () {
629
590
  var tag = this.rawTagName;
630
591
  if (tag) {
631
- // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
632
- // const is_void = void_tags.has(tag);
633
- var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
634
592
  var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
635
- if (is_void) {
636
- return "<" + tag + attrs + ">";
637
- }
638
- return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
593
+ return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
639
594
  }
640
595
  return this.innerHTML;
641
596
  };
642
597
  Object.defineProperty(HTMLElement.prototype, "innerHTML", {
643
598
  get: function () {
644
- return this.childNodes.map(function (child) {
599
+ return this.childNodes
600
+ .map(function (child) {
645
601
  return child.toString();
646
- }).join('');
602
+ })
603
+ .join('');
647
604
  },
648
605
  set: function (content) {
649
606
  //const r = parse(content, global.options); // TODO global.options ?
@@ -670,7 +627,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
670
627
  for (var _i = 0; _i < arguments.length; _i++) {
671
628
  nodes[_i] = arguments[_i];
672
629
  }
673
- var content = nodes.map(function (node) {
630
+ var content = nodes
631
+ .map(function (node) {
674
632
  if (node instanceof node_2.default) {
675
633
  return [node];
676
634
  }
@@ -680,7 +638,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
680
638
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
681
639
  }
682
640
  return [];
683
- }).flat();
641
+ })
642
+ .flat();
684
643
  var idx = this.parentNode.childNodes.findIndex(function (child) {
685
644
  return child === _this;
686
645
  });
@@ -727,8 +686,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
727
686
  res.push(' '.repeat(indention) + str);
728
687
  }
729
688
  function dfs(node) {
730
- var idStr = node.id ? ("#" + node.id) : '';
731
- var classStr = node.classList.length ? ("." + node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
689
+ var idStr = node.id ? "#" + node.id : '';
690
+ var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
732
691
  write("" + node.rawTagName + idStr + classStr);
733
692
  indention++;
734
693
  node.childNodes.forEach(function (childNode) {
@@ -779,113 +738,57 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
779
738
  HTMLElement.prototype.querySelectorAll = function (selector) {
780
739
  return (0, css_select_1.selectAll)(selector, this, {
781
740
  xmlMode: true,
782
- adapter: matcher_1.default
741
+ adapter: matcher_1.default,
783
742
  });
784
- // let matcher: Matcher;
785
- // if (selector instanceof Matcher) {
786
- // matcher = selector;
787
- // matcher.reset();
788
- // } else {
789
- // if (selector.includes(',')) {
790
- // const selectors = selector.split(',');
791
- // return Array.from(selectors.reduce((pre, cur) => {
792
- // const result = this.querySelectorAll(cur.trim());
793
- // return result.reduce((p, c) => {
794
- // return p.add(c);
795
- // }, pre);
796
- // }, new Set<HTMLElement>()));
797
- // }
798
- // matcher = new Matcher(selector);
799
- // }
800
- // interface IStack {
801
- // 0: Node; // node
802
- // 1: number; // children
803
- // 2: boolean; // found flag
804
- // }
805
- // const stack = [] as IStack[];
806
- // return this.childNodes.reduce((res, cur) => {
807
- // stack.push([cur, 0, false]);
808
- // while (stack.length) {
809
- // const state = arr_back(stack); // get last element
810
- // const el = state[0];
811
- // if (state[1] === 0) {
812
- // // Seen for first time.
813
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
814
- // stack.pop();
815
- // continue;
816
- // }
817
- // const html_el = el as HTMLElement;
818
- // state[2] = matcher.advance(html_el);
819
- // if (state[2]) {
820
- // if (matcher.matched) {
821
- // res.push(html_el);
822
- // res.push(...(html_el.querySelectorAll(selector)));
823
- // // no need to go further.
824
- // matcher.rewind();
825
- // stack.pop();
826
- // continue;
827
- // }
828
- // }
829
- // }
830
- // if (state[1] < el.childNodes.length) {
831
- // stack.push([el.childNodes[state[1]++], 0, false]);
832
- // } else {
833
- // if (state[2]) {
834
- // matcher.rewind();
835
- // }
836
- // stack.pop();
837
- // }
838
- // }
839
- // return res;
840
- // }, [] as HTMLElement[]);
841
743
  };
842
744
  /**
843
745
  * Query CSS Selector to find matching node.
844
746
  * @param {string} selector Simplified CSS selector
845
- * @return {HTMLElement} matching node
747
+ * @return {(HTMLElement|null)} matching node
846
748
  */
847
749
  HTMLElement.prototype.querySelector = function (selector) {
848
750
  return (0, css_select_1.selectOne)(selector, this, {
849
751
  xmlMode: true,
850
- adapter: matcher_1.default
752
+ adapter: matcher_1.default,
851
753
  });
852
- // let matcher: Matcher;
853
- // if (selector instanceof Matcher) {
854
- // matcher = selector;
855
- // matcher.reset();
856
- // } else {
857
- // matcher = new Matcher(selector);
858
- // }
859
- // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
860
- // for (const node of this.childNodes) {
861
- // stack.push([node, 0, false]);
862
- // while (stack.length) {
863
- // const state = arr_back(stack);
864
- // const el = state[0];
865
- // if (state[1] === 0) {
866
- // // Seen for first time.
867
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
868
- // stack.pop();
869
- // continue;
870
- // }
871
- // state[2] = matcher.advance(el as HTMLElement);
872
- // if (state[2]) {
873
- // if (matcher.matched) {
874
- // return el as HTMLElement;
875
- // }
876
- // }
877
- // }
878
- // if (state[1] < el.childNodes.length) {
879
- // stack.push([el.childNodes[state[1]++], 0, false]);
880
- // } else {
881
- // if (state[2]) {
882
- // matcher.rewind();
883
- // }
884
- // stack.pop();
885
- // }
886
- // }
887
- // }
888
- // return null;
754
+ };
755
+ /**
756
+ * find elements by their tagName
757
+ * @param {string} tagName the tagName of the elements to select
758
+ */
759
+ HTMLElement.prototype.getElementsByTagName = function (tagName) {
760
+ var upperCasedTagName = tagName.toUpperCase();
761
+ var re = [];
762
+ var stack = [];
763
+ var currentNodeReference = this;
764
+ var index = 0;
765
+ // index turns to undefined once the stack is empty and the first condition occurs
766
+ // which happens once all relevant children are searched through
767
+ while (index !== undefined) {
768
+ var child = void 0;
769
+ // make it work with sparse arrays
770
+ do {
771
+ child = currentNodeReference.childNodes[index++];
772
+ } while (index < currentNodeReference.childNodes.length && child === undefined);
773
+ // if the child does not exist we move on with the last provided index (which belongs to the parentNode)
774
+ if (child === undefined) {
775
+ currentNodeReference = currentNodeReference.parentNode;
776
+ index = stack.pop();
777
+ continue;
778
+ }
779
+ if (child.nodeType === type_3.default.ELEMENT_NODE) {
780
+ // https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
781
+ if (tagName === '*' || child.tagName === upperCasedTagName)
782
+ re.push(child);
783
+ // if children are existing push the current status to the stack and keep searching for elements in the level below
784
+ if (child.childNodes.length > 0) {
785
+ stack.push(index);
786
+ currentNodeReference = child;
787
+ index = 0;
788
+ }
789
+ }
790
+ }
791
+ return re;
889
792
  };
890
793
  /**
891
794
  * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
@@ -927,7 +830,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
927
830
  return [node];
928
831
  }, findOne: findOne, findAll: function () {
929
832
  return [];
930
- } })
833
+ } }),
931
834
  });
932
835
  if (e) {
933
836
  return e;
@@ -1005,7 +908,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1005
908
  });
1006
909
  Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
1007
910
  /**
1008
- * Get escaped (as-it) attributes
911
+ * Get escaped (as-is) attributes
1009
912
  * @return {Object} parsed attributes
1010
913
  */
1011
914
  get: function () {
@@ -1014,10 +917,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1014
917
  }
1015
918
  var attrs = {};
1016
919
  if (this.rawAttrs) {
1017
- var re = /([a-z()#][a-z0-9-_:()#]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
920
+ var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1018
921
  var match = void 0;
1019
922
  while ((match = re.exec(this.rawAttrs))) {
1020
- attrs[match[1]] = match[2] || match[3] || match[4] || null;
923
+ var key = match[1];
924
+ var val = match[2] || null;
925
+ if (val && (val[0] === "'" || val[0] === "\""))
926
+ val = val.slice(1, val.length - 1);
927
+ attrs[key] = val;
1021
928
  }
1022
929
  }
1023
930
  this._rawAttrs = attrs;
@@ -1034,13 +941,15 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1034
941
  delete this._attrs[key];
1035
942
  }
1036
943
  // Update rawString
1037
- this.rawAttrs = Object.keys(attrs).map(function (name) {
944
+ this.rawAttrs = Object.keys(attrs)
945
+ .map(function (name) {
1038
946
  var val = JSON.stringify(attrs[name]);
1039
947
  if (val === undefined || val === 'null') {
1040
948
  return name;
1041
949
  }
1042
950
  return name + "=" + val;
1043
- }).join(' ');
951
+ })
952
+ .join(' ');
1044
953
  // Update this.id
1045
954
  if (key === 'id') {
1046
955
  this.id = '';
@@ -1064,7 +973,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1064
973
  HTMLElement.prototype.setAttribute = function (key, value) {
1065
974
  var _this = this;
1066
975
  if (arguments.length < 2) {
1067
- throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
976
+ throw new Error("Failed to execute 'setAttribute' on 'Element'");
1068
977
  }
1069
978
  var k2 = key.toLowerCase();
1070
979
  var attrs = this.rawAttributes;
@@ -1080,13 +989,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1080
989
  this._attrs[k2] = decode(attrs[key]);
1081
990
  }
1082
991
  // Update rawString
1083
- this.rawAttrs = Object.keys(attrs).map(function (name) {
992
+ this.rawAttrs = Object.keys(attrs)
993
+ .map(function (name) {
1084
994
  var val = _this.quoteAttribute(attrs[name]);
1085
- if (val === 'null' || val === '""') {
995
+ if (val === 'null' || val === '""')
1086
996
  return name;
1087
- }
1088
997
  return name + "=" + val;
1089
- }).join(' ');
998
+ })
999
+ .join(' ');
1090
1000
  // Update this.id
1091
1001
  if (key === 'id') {
1092
1002
  this.id = value;
@@ -1107,13 +1017,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1107
1017
  delete this._rawAttrs;
1108
1018
  }
1109
1019
  // Update rawString
1110
- this.rawAttrs = Object.keys(attributes).map(function (name) {
1020
+ this.rawAttrs = Object.keys(attributes)
1021
+ .map(function (name) {
1111
1022
  var val = attributes[name];
1112
- if (val === 'null' || val === '""') {
1023
+ if (val === 'null' || val === '""')
1113
1024
  return name;
1114
- }
1115
1025
  return name + "=" + _this.quoteAttribute(String(val));
1116
- }).join(' ');
1026
+ })
1027
+ .join(' ');
1117
1028
  };
1118
1029
  HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
1119
1030
  var _a, _b, _c;
@@ -1166,9 +1077,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1166
1077
  var i = 0;
1167
1078
  while (i < children.length) {
1168
1079
  var child = children[i++];
1169
- if (this === child) {
1080
+ if (this === child)
1170
1081
  return children[i] || null;
1171
- }
1172
1082
  }
1173
1083
  return null;
1174
1084
  }
@@ -1210,12 +1120,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1210
1120
  }(node_2.default));
1211
1121
  exports.default = HTMLElement;
1212
1122
  // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
1213
- var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
1214
- // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
1215
- // <([a-z][-.:0-9_a-z]*)\s*\/>
1216
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
1217
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
1218
- var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
1123
+ var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
1124
+ var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
1219
1125
  var kSelfClosingElements = {
1220
1126
  area: true,
1221
1127
  AREA: true,
@@ -1244,7 +1150,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1244
1150
  track: true,
1245
1151
  TRACK: true,
1246
1152
  wbr: true,
1247
- WBR: true
1153
+ WBR: true,
1248
1154
  };
1249
1155
  var kElementsClosedByOpening = {
1250
1156
  li: { li: true, LI: true },
@@ -1268,7 +1174,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1268
1174
  h5: { h5: true, H5: true },
1269
1175
  H5: { h5: true, H5: true },
1270
1176
  h6: { h6: true, H6: true },
1271
- H6: { h6: true, H6: true }
1177
+ H6: { h6: true, H6: true },
1272
1178
  };
1273
1179
  var kElementsClosedByClosing = {
1274
1180
  li: { ul: true, ol: true, UL: true, OL: true },
@@ -1284,7 +1190,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1284
1190
  td: { tr: true, table: true, TR: true, TABLE: true },
1285
1191
  TD: { tr: true, table: true, TR: true, TABLE: true },
1286
1192
  th: { tr: true, table: true, TR: true, TABLE: true },
1287
- TH: { tr: true, table: true, TR: true, TABLE: true }
1193
+ TH: { tr: true, table: true, TR: true, TABLE: true },
1288
1194
  };
1289
1195
  var frameflag = 'documentfragmentcontainer';
1290
1196
  /**
@@ -1299,45 +1205,39 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1299
1205
  script: true,
1300
1206
  noscript: true,
1301
1207
  style: true,
1302
- pre: true
1208
+ pre: true,
1303
1209
  };
1304
1210
  var element_names = Object.keys(elements);
1305
- var kBlockTextElements = element_names.map(function (it) {
1306
- return new RegExp(it, 'i');
1307
- });
1308
- var kIgnoreElements = element_names.filter(function (it) {
1309
- return elements[it];
1310
- }).map(function (it) {
1311
- return new RegExp(it, 'i');
1312
- });
1211
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1212
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1313
1213
  function element_should_be_ignore(tag) {
1314
- return kIgnoreElements.some(function (it) {
1315
- return it.test(tag);
1316
- });
1214
+ return kIgnoreElements.some(function (it) { return it.test(tag); });
1317
1215
  }
1318
1216
  function is_block_text_element(tag) {
1319
- return kBlockTextElements.some(function (it) {
1320
- return it.test(tag);
1321
- });
1217
+ return kBlockTextElements.some(function (it) { return it.test(tag); });
1322
1218
  }
1323
- var createRange = function (startPos, endPos) {
1324
- return [startPos - frameFlagOffset, endPos - frameFlagOffset];
1325
- };
1219
+ var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1326
1220
  var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1327
1221
  var currentParent = root;
1328
1222
  var stack = [root];
1329
1223
  var lastTextPos = -1;
1224
+ var noNestedTagIndex = undefined;
1330
1225
  var match;
1331
1226
  // https://github.com/taoqf/node-html-parser/issues/38
1332
1227
  data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1228
+ var lowerCaseTagName = options.lowerCaseTagName;
1333
1229
  var dataEndPos = data.length - (frameflag.length + 2);
1334
1230
  var frameFlagOffset = frameflag.length + 2;
1335
1231
  while ((match = kMarkupPattern.exec(data))) {
1336
- var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
1232
+ // Note: Object destructuring here consistently tests as higher performance than array destructuring
1233
+ // eslint-disable-next-line prefer-const
1234
+ var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
1235
+ var matchLength = matchText.length;
1236
+ var tagStartPos = kMarkupPattern.lastIndex - matchLength;
1337
1237
  var tagEndPos = kMarkupPattern.lastIndex;
1338
1238
  // Add TextNode if content
1339
1239
  if (lastTextPos > -1) {
1340
- if (lastTextPos + match[0].length < tagEndPos) {
1240
+ if (lastTextPos + matchLength < tagEndPos) {
1341
1241
  var text = data.substring(lastTextPos, tagStartPos);
1342
1242
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
1343
1243
  }
@@ -1345,10 +1245,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1345
1245
  lastTextPos = kMarkupPattern.lastIndex;
1346
1246
  // https://github.com/taoqf/node-html-parser/issues/38
1347
1247
  // Skip frameflag node
1348
- if (match[2] === frameflag)
1248
+ if (tagName === frameflag)
1349
1249
  continue;
1350
1250
  // Handle comments
1351
- if (match[0][1] === '!') {
1251
+ if (matchText[1] === '!') {
1352
1252
  if (options.comment) {
1353
1253
  // Only keep what is in between <!-- and -->
1354
1254
  var text = data.substring(tagStartPos + 4, tagEndPos - 3);
@@ -1358,42 +1258,46 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1358
1258
  }
1359
1259
  /* -- Handle tag matching -- */
1360
1260
  // Fix tag casing if necessary
1361
- if (options.lowerCaseTagName)
1362
- match[2] = match[2].toLowerCase();
1261
+ if (lowerCaseTagName)
1262
+ tagName = tagName.toLowerCase();
1363
1263
  // Handle opening tags (ie. <this> not </that>)
1364
- if (!match[1]) {
1264
+ if (!leadingSlash) {
1365
1265
  /* Populate attributes */
1366
1266
  var attrs = {};
1367
- for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
1368
- attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
1267
+ for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
1268
+ var key = attMatch[1], val = attMatch[2];
1269
+ var isQuoted = val[0] === "'" || val[0] === "\"";
1270
+ attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
1369
1271
  }
1370
- var tagName = currentParent.rawTagName;
1371
- if (!match[4] && kElementsClosedByOpening[tagName]) {
1372
- if (kElementsClosedByOpening[tagName][match[2]]) {
1272
+ var parentTagName = currentParent.rawTagName;
1273
+ if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
1274
+ if (kElementsClosedByOpening[parentTagName][tagName]) {
1373
1275
  stack.pop();
1374
1276
  currentParent = (0, back_1.default)(stack);
1375
1277
  }
1376
1278
  }
1377
- // console.error('111111111111111111', currentParent.rawTagName);
1378
- // console.error('22222222222222222222', match);
1379
- if (currentParent.rawTagName === 'a' && match[2] === 'a') {
1380
- stack.pop();
1381
- currentParent = (0, back_1.default)(stack);
1279
+ // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1280
+ if (tagName === 'a' || tagName === 'A') {
1281
+ if (noNestedTagIndex !== undefined) {
1282
+ stack.splice(noNestedTagIndex);
1283
+ currentParent = (0, back_1.default)(stack);
1284
+ }
1285
+ noNestedTagIndex = stack.length;
1382
1286
  }
1383
1287
  var tagEndPos_1 = kMarkupPattern.lastIndex;
1384
- var tagStartPos_1 = tagEndPos_1 - match[0].length;
1288
+ var tagStartPos_1 = tagEndPos_1 - matchLength;
1385
1289
  currentParent = currentParent.appendChild(
1386
1290
  // Initialize range (end position updated later for closed tags)
1387
- new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
1291
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1388
1292
  stack.push(currentParent);
1389
- if (is_block_text_element(match[2])) {
1293
+ if (is_block_text_element(tagName)) {
1390
1294
  // Find closing tag
1391
- var closeMarkup = "</" + match[2] + ">";
1392
- var closeIndex = options.lowerCaseTagName
1295
+ var closeMarkup = "</" + tagName + ">";
1296
+ var closeIndex = lowerCaseTagName
1393
1297
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1394
1298
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
1395
1299
  var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
1396
- if (element_should_be_ignore(match[2])) {
1300
+ if (element_should_be_ignore(tagName)) {
1397
1301
  var text = data.substring(tagEndPos_1, textEndPos);
1398
1302
  if (text.length > 0 && /\S/.test(text)) {
1399
1303
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
@@ -1405,14 +1309,16 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1405
1309
  else {
1406
1310
  lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
1407
1311
  // Cause to be treated as self-closing, because no close found
1408
- match[1] = 'true';
1312
+ leadingSlash = '/';
1409
1313
  }
1410
1314
  }
1411
1315
  }
1412
1316
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1413
- if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1317
+ if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1414
1318
  while (true) {
1415
- if (currentParent.rawTagName === match[2]) {
1319
+ if (tagName === 'a' || tagName === 'A')
1320
+ noNestedTagIndex = undefined;
1321
+ if (currentParent.rawTagName === tagName) {
1416
1322
  // Update range end for closed tag
1417
1323
  currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1418
1324
  stack.pop();
@@ -1420,10 +1326,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1420
1326
  break;
1421
1327
  }
1422
1328
  else {
1423
- var tagName = currentParent.tagName;
1329
+ var parentTagName = currentParent.tagName;
1424
1330
  // Trying to close current tag, and move on
1425
- if (kElementsClosedByClosing[tagName]) {
1426
- if (kElementsClosedByClosing[tagName][match[2]]) {
1331
+ if (kElementsClosedByClosing[parentTagName]) {
1332
+ if (kElementsClosedByClosing[parentTagName][tagName]) {
1427
1333
  stack.pop();
1428
1334
  currentParent = (0, back_1.default)(stack);
1429
1335
  continue;