node-html-parser 4.1.3 → 5.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,7 +32,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
32
32
  ar[i] = from[i];
33
33
  }
34
34
  }
35
- return to.concat(ar || from);
35
+ return to.concat(ar || Array.prototype.slice.call(from));
36
36
  };
37
37
  var __importDefault = (this && this.__importDefault) || function (mod) {
38
38
  return (mod && mod.__esModule) ? mod : { "default": mod };
@@ -47,89 +47,40 @@ var text_1 = __importDefault(require("./text"));
47
47
  var matcher_1 = __importDefault(require("../matcher"));
48
48
  var back_1 = __importDefault(require("../back"));
49
49
  var comment_1 = __importDefault(require("./comment"));
50
- // const { decode } = he;
50
+ var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
51
51
  function decode(val) {
52
52
  // clone string
53
53
  return JSON.parse(JSON.stringify(he_1.default.decode(val)));
54
54
  }
55
55
  // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
56
+ var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
57
+ var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
58
+ var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
59
+ var tableTags = ['table', 'td', 'tr'];
60
+ var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
56
61
  var kBlockElements = new Set();
57
- kBlockElements.add('address');
58
- kBlockElements.add('ADDRESS');
59
- kBlockElements.add('article');
60
- kBlockElements.add('ARTICLE');
61
- kBlockElements.add('aside');
62
- kBlockElements.add('ASIDE');
63
- kBlockElements.add('blockquote');
64
- kBlockElements.add('BLOCKQUOTE');
65
- kBlockElements.add('br');
66
- kBlockElements.add('BR');
67
- kBlockElements.add('details');
68
- kBlockElements.add('DETAILS');
69
- kBlockElements.add('dialog');
70
- kBlockElements.add('DIALOG');
71
- kBlockElements.add('dd');
72
- kBlockElements.add('DD');
73
- kBlockElements.add('div');
74
- kBlockElements.add('DIV');
75
- kBlockElements.add('dl');
76
- kBlockElements.add('DL');
77
- kBlockElements.add('dt');
78
- kBlockElements.add('DT');
79
- kBlockElements.add('fieldset');
80
- kBlockElements.add('FIELDSET');
81
- kBlockElements.add('figcaption');
82
- kBlockElements.add('FIGCAPTION');
83
- kBlockElements.add('figure');
84
- kBlockElements.add('FIGURE');
85
- kBlockElements.add('footer');
86
- kBlockElements.add('FOOTER');
87
- kBlockElements.add('form');
88
- kBlockElements.add('FORM');
89
- kBlockElements.add('h1');
90
- kBlockElements.add('H1');
91
- kBlockElements.add('h2');
92
- kBlockElements.add('H2');
93
- kBlockElements.add('h3');
94
- kBlockElements.add('H3');
95
- kBlockElements.add('h4');
96
- kBlockElements.add('H4');
97
- kBlockElements.add('h5');
98
- kBlockElements.add('H5');
99
- kBlockElements.add('h6');
100
- kBlockElements.add('H6');
101
- kBlockElements.add('header');
102
- kBlockElements.add('HEADER');
103
- kBlockElements.add('hgroup');
104
- kBlockElements.add('HGROUP');
105
- kBlockElements.add('hr');
106
- kBlockElements.add('HR');
107
- kBlockElements.add('li');
108
- kBlockElements.add('LI');
109
- kBlockElements.add('main');
110
- kBlockElements.add('MAIN');
111
- kBlockElements.add('nav');
112
- kBlockElements.add('NAV');
113
- kBlockElements.add('ol');
114
- kBlockElements.add('OL');
115
- kBlockElements.add('p');
116
- kBlockElements.add('P');
117
- kBlockElements.add('pre');
118
- kBlockElements.add('PRE');
119
- kBlockElements.add('section');
120
- kBlockElements.add('SECTION');
121
- kBlockElements.add('table');
122
- kBlockElements.add('TABLE');
123
- kBlockElements.add('td');
124
- kBlockElements.add('TD');
125
- kBlockElements.add('tr');
126
- kBlockElements.add('TR');
127
- kBlockElements.add('ul');
128
- kBlockElements.add('UL');
62
+ function addToKBlockElement() {
63
+ var args = [];
64
+ for (var _i = 0; _i < arguments.length; _i++) {
65
+ args[_i] = arguments[_i];
66
+ }
67
+ var addToSet = function (array) {
68
+ for (var index = 0; index < array.length; index++) {
69
+ var element = array[index];
70
+ kBlockElements.add(element);
71
+ kBlockElements.add(element.toUpperCase());
72
+ }
73
+ };
74
+ for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
75
+ var arg = args_1[_a];
76
+ addToSet(arg);
77
+ }
78
+ }
79
+ addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
129
80
  var DOMTokenList = /** @class */ (function () {
130
81
  function DOMTokenList(valuesInit, afterUpdate) {
131
82
  if (valuesInit === void 0) { valuesInit = []; }
132
- if (afterUpdate === void 0) { afterUpdate = (function () { return null; }); }
83
+ if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
133
84
  this._set = new Set(valuesInit);
134
85
  this._afterUpdate = afterUpdate;
135
86
  }
@@ -150,8 +101,7 @@ var DOMTokenList = /** @class */ (function () {
150
101
  this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
151
102
  };
152
103
  DOMTokenList.prototype.remove = function (c) {
153
- this._set.delete(c) &&
154
- this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
104
+ this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
155
105
  };
156
106
  DOMTokenList.prototype.toggle = function (c) {
157
107
  this._validate(c);
@@ -216,8 +166,8 @@ var HTMLElement = /** @class */ (function (_super) {
216
166
  _this.rawAttrs = rawAttrs || '';
217
167
  _this.id = keyAttrs.id || '';
218
168
  _this.childNodes = [];
219
- _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return (_this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
220
- ); });
169
+ _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
170
+ );
221
171
  if (keyAttrs.id) {
222
172
  if (!rawAttrs) {
223
173
  _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
@@ -242,8 +192,8 @@ var HTMLElement = /** @class */ (function (_super) {
242
192
  * @returns {string} quoted value
243
193
  */
244
194
  HTMLElement.prototype.quoteAttribute = function (attr) {
245
- if (attr === null) {
246
- return "null";
195
+ if (attr == null) {
196
+ return 'null';
247
197
  }
248
198
  return JSON.stringify(attr.replace(/"/g, '&quot;'));
249
199
  };
@@ -265,7 +215,7 @@ var HTMLElement = /** @class */ (function (_super) {
265
215
  */
266
216
  HTMLElement.prototype.removeChild = function (node) {
267
217
  this.childNodes = this.childNodes.filter(function (child) {
268
- return (child !== node);
218
+ return child !== node;
269
219
  });
270
220
  };
271
221
  /**
@@ -296,6 +246,13 @@ var HTMLElement = /** @class */ (function (_super) {
296
246
  enumerable: false,
297
247
  configurable: true
298
248
  });
249
+ Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
250
+ get: function () {
251
+ return voidTags.has(this.localName);
252
+ },
253
+ enumerable: false,
254
+ configurable: true
255
+ });
299
256
  Object.defineProperty(HTMLElement.prototype, "rawText", {
300
257
  /**
301
258
  * Get escpaed (as-it) text value of current node and its children.
@@ -343,11 +300,11 @@ var HTMLElement = /** @class */ (function (_super) {
343
300
  if (node.nodeType === type_1.default.ELEMENT_NODE) {
344
301
  if (kBlockElements.has(node.rawTagName)) {
345
302
  if (currentBlock.length > 0) {
346
- blocks.push(currentBlock = []);
303
+ blocks.push((currentBlock = []));
347
304
  }
348
305
  node.childNodes.forEach(dfs);
349
306
  if (currentBlock.length > 0) {
350
- blocks.push(currentBlock = []);
307
+ blocks.push((currentBlock = []));
351
308
  }
352
309
  }
353
310
  else {
@@ -370,11 +327,12 @@ var HTMLElement = /** @class */ (function (_super) {
370
327
  }
371
328
  }
372
329
  dfs(this);
373
- return blocks.map(function (block) {
374
- // Normalize each line's whitespace
375
- return block.join('').replace(/\s{2,}/g, ' ');
330
+ return blocks
331
+ .map(function (block) {
332
+ return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
376
333
  })
377
- .join('\n').replace(/\s+$/, ''); // trimRight;
334
+ .join('\n')
335
+ .replace(/\s+$/, ''); // trimRight;
378
336
  },
379
337
  enumerable: false,
380
338
  configurable: true
@@ -382,22 +340,18 @@ var HTMLElement = /** @class */ (function (_super) {
382
340
  HTMLElement.prototype.toString = function () {
383
341
  var tag = this.rawTagName;
384
342
  if (tag) {
385
- // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
386
- // const is_void = void_tags.has(tag);
387
- var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
388
343
  var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
389
- if (is_void) {
390
- return "<" + tag + attrs + ">";
391
- }
392
- return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
344
+ return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
393
345
  }
394
346
  return this.innerHTML;
395
347
  };
396
348
  Object.defineProperty(HTMLElement.prototype, "innerHTML", {
397
349
  get: function () {
398
- return this.childNodes.map(function (child) {
350
+ return this.childNodes
351
+ .map(function (child) {
399
352
  return child.toString();
400
- }).join('');
353
+ })
354
+ .join('');
401
355
  },
402
356
  set: function (content) {
403
357
  //const r = parse(content, global.options); // TODO global.options ?
@@ -424,7 +378,8 @@ var HTMLElement = /** @class */ (function (_super) {
424
378
  for (var _i = 0; _i < arguments.length; _i++) {
425
379
  nodes[_i] = arguments[_i];
426
380
  }
427
- var content = nodes.map(function (node) {
381
+ var content = nodes
382
+ .map(function (node) {
428
383
  if (node instanceof node_1.default) {
429
384
  return [node];
430
385
  }
@@ -434,7 +389,8 @@ var HTMLElement = /** @class */ (function (_super) {
434
389
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
435
390
  }
436
391
  return [];
437
- }).flat();
392
+ })
393
+ .flat();
438
394
  var idx = this.parentNode.childNodes.findIndex(function (child) {
439
395
  return child === _this;
440
396
  });
@@ -481,8 +437,8 @@ var HTMLElement = /** @class */ (function (_super) {
481
437
  res.push(' '.repeat(indention) + str);
482
438
  }
483
439
  function dfs(node) {
484
- var idStr = node.id ? ("#" + node.id) : '';
485
- var classStr = node.classList.length ? ("." + node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
440
+ var idStr = node.id ? "#" + node.id : '';
441
+ var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
486
442
  write("" + node.rawTagName + idStr + classStr);
487
443
  indention++;
488
444
  node.childNodes.forEach(function (childNode) {
@@ -533,113 +489,57 @@ var HTMLElement = /** @class */ (function (_super) {
533
489
  HTMLElement.prototype.querySelectorAll = function (selector) {
534
490
  return (0, css_select_1.selectAll)(selector, this, {
535
491
  xmlMode: true,
536
- adapter: matcher_1.default
492
+ adapter: matcher_1.default,
537
493
  });
538
- // let matcher: Matcher;
539
- // if (selector instanceof Matcher) {
540
- // matcher = selector;
541
- // matcher.reset();
542
- // } else {
543
- // if (selector.includes(',')) {
544
- // const selectors = selector.split(',');
545
- // return Array.from(selectors.reduce((pre, cur) => {
546
- // const result = this.querySelectorAll(cur.trim());
547
- // return result.reduce((p, c) => {
548
- // return p.add(c);
549
- // }, pre);
550
- // }, new Set<HTMLElement>()));
551
- // }
552
- // matcher = new Matcher(selector);
553
- // }
554
- // interface IStack {
555
- // 0: Node; // node
556
- // 1: number; // children
557
- // 2: boolean; // found flag
558
- // }
559
- // const stack = [] as IStack[];
560
- // return this.childNodes.reduce((res, cur) => {
561
- // stack.push([cur, 0, false]);
562
- // while (stack.length) {
563
- // const state = arr_back(stack); // get last element
564
- // const el = state[0];
565
- // if (state[1] === 0) {
566
- // // Seen for first time.
567
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
568
- // stack.pop();
569
- // continue;
570
- // }
571
- // const html_el = el as HTMLElement;
572
- // state[2] = matcher.advance(html_el);
573
- // if (state[2]) {
574
- // if (matcher.matched) {
575
- // res.push(html_el);
576
- // res.push(...(html_el.querySelectorAll(selector)));
577
- // // no need to go further.
578
- // matcher.rewind();
579
- // stack.pop();
580
- // continue;
581
- // }
582
- // }
583
- // }
584
- // if (state[1] < el.childNodes.length) {
585
- // stack.push([el.childNodes[state[1]++], 0, false]);
586
- // } else {
587
- // if (state[2]) {
588
- // matcher.rewind();
589
- // }
590
- // stack.pop();
591
- // }
592
- // }
593
- // return res;
594
- // }, [] as HTMLElement[]);
595
494
  };
596
495
  /**
597
496
  * Query CSS Selector to find matching node.
598
497
  * @param {string} selector Simplified CSS selector
599
- * @return {HTMLElement} matching node
498
+ * @return {(HTMLElement|null)} matching node
600
499
  */
601
500
  HTMLElement.prototype.querySelector = function (selector) {
602
501
  return (0, css_select_1.selectOne)(selector, this, {
603
502
  xmlMode: true,
604
- adapter: matcher_1.default
503
+ adapter: matcher_1.default,
605
504
  });
606
- // let matcher: Matcher;
607
- // if (selector instanceof Matcher) {
608
- // matcher = selector;
609
- // matcher.reset();
610
- // } else {
611
- // matcher = new Matcher(selector);
612
- // }
613
- // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
614
- // for (const node of this.childNodes) {
615
- // stack.push([node, 0, false]);
616
- // while (stack.length) {
617
- // const state = arr_back(stack);
618
- // const el = state[0];
619
- // if (state[1] === 0) {
620
- // // Seen for first time.
621
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
622
- // stack.pop();
623
- // continue;
624
- // }
625
- // state[2] = matcher.advance(el as HTMLElement);
626
- // if (state[2]) {
627
- // if (matcher.matched) {
628
- // return el as HTMLElement;
629
- // }
630
- // }
631
- // }
632
- // if (state[1] < el.childNodes.length) {
633
- // stack.push([el.childNodes[state[1]++], 0, false]);
634
- // } else {
635
- // if (state[2]) {
636
- // matcher.rewind();
637
- // }
638
- // stack.pop();
639
- // }
640
- // }
641
- // }
642
- // return null;
505
+ };
506
+ /**
507
+ * find elements by their tagName
508
+ * @param {string} tagName the tagName of the elements to select
509
+ */
510
+ HTMLElement.prototype.getElementsByTagName = function (tagName) {
511
+ var upperCasedTagName = tagName.toUpperCase();
512
+ var re = [];
513
+ var stack = [];
514
+ var currentNodeReference = this;
515
+ var index = 0;
516
+ // index turns to undefined once the stack is empty and the first condition occurs
517
+ // which happens once all relevant children are searched through
518
+ while (index !== undefined) {
519
+ var child = void 0;
520
+ // make it work with sparse arrays
521
+ do {
522
+ child = currentNodeReference.childNodes[index++];
523
+ } while (index < currentNodeReference.childNodes.length && child === undefined);
524
+ // if the child does not exist we move on with the last provided index (which belongs to the parentNode)
525
+ if (child === undefined) {
526
+ currentNodeReference = currentNodeReference.parentNode;
527
+ index = stack.pop();
528
+ continue;
529
+ }
530
+ if (child.nodeType === type_1.default.ELEMENT_NODE) {
531
+ // https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
532
+ if (tagName === '*' || child.tagName === upperCasedTagName)
533
+ re.push(child);
534
+ // if children are existing push the current status to the stack and keep searching for elements in the level below
535
+ if (child.childNodes.length > 0) {
536
+ stack.push(index);
537
+ currentNodeReference = child;
538
+ index = 0;
539
+ }
540
+ }
541
+ }
542
+ return re;
643
543
  };
644
544
  /**
645
545
  * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
@@ -681,7 +581,7 @@ var HTMLElement = /** @class */ (function (_super) {
681
581
  return [node];
682
582
  }, findOne: findOne, findAll: function () {
683
583
  return [];
684
- } })
584
+ } }),
685
585
  });
686
586
  if (e) {
687
587
  return e;
@@ -759,7 +659,7 @@ var HTMLElement = /** @class */ (function (_super) {
759
659
  });
760
660
  Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
761
661
  /**
762
- * Get escaped (as-it) attributes
662
+ * Get escaped (as-is) attributes
763
663
  * @return {Object} parsed attributes
764
664
  */
765
665
  get: function () {
@@ -768,10 +668,14 @@ var HTMLElement = /** @class */ (function (_super) {
768
668
  }
769
669
  var attrs = {};
770
670
  if (this.rawAttrs) {
771
- var re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
671
+ var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
772
672
  var match = void 0;
773
673
  while ((match = re.exec(this.rawAttrs))) {
774
- attrs[match[1]] = match[2] || match[3] || match[4] || null;
674
+ var key = match[1];
675
+ var val = match[2] || null;
676
+ if (val && (val[0] === "'" || val[0] === "\""))
677
+ val = val.slice(1, val.length - 1);
678
+ attrs[key] = val;
775
679
  }
776
680
  }
777
681
  this._rawAttrs = attrs;
@@ -788,13 +692,15 @@ var HTMLElement = /** @class */ (function (_super) {
788
692
  delete this._attrs[key];
789
693
  }
790
694
  // Update rawString
791
- this.rawAttrs = Object.keys(attrs).map(function (name) {
695
+ this.rawAttrs = Object.keys(attrs)
696
+ .map(function (name) {
792
697
  var val = JSON.stringify(attrs[name]);
793
698
  if (val === undefined || val === 'null') {
794
699
  return name;
795
700
  }
796
701
  return name + "=" + val;
797
- }).join(' ');
702
+ })
703
+ .join(' ');
798
704
  // Update this.id
799
705
  if (key === 'id') {
800
706
  this.id = '';
@@ -818,7 +724,7 @@ var HTMLElement = /** @class */ (function (_super) {
818
724
  HTMLElement.prototype.setAttribute = function (key, value) {
819
725
  var _this = this;
820
726
  if (arguments.length < 2) {
821
- throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
727
+ throw new Error("Failed to execute 'setAttribute' on 'Element'");
822
728
  }
823
729
  var k2 = key.toLowerCase();
824
730
  var attrs = this.rawAttributes;
@@ -834,13 +740,14 @@ var HTMLElement = /** @class */ (function (_super) {
834
740
  this._attrs[k2] = decode(attrs[key]);
835
741
  }
836
742
  // Update rawString
837
- this.rawAttrs = Object.keys(attrs).map(function (name) {
743
+ this.rawAttrs = Object.keys(attrs)
744
+ .map(function (name) {
838
745
  var val = _this.quoteAttribute(attrs[name]);
839
- if (val === 'null' || val === '""') {
746
+ if (val === 'null' || val === '""')
840
747
  return name;
841
- }
842
748
  return name + "=" + val;
843
- }).join(' ');
749
+ })
750
+ .join(' ');
844
751
  // Update this.id
845
752
  if (key === 'id') {
846
753
  this.id = value;
@@ -861,13 +768,14 @@ var HTMLElement = /** @class */ (function (_super) {
861
768
  delete this._rawAttrs;
862
769
  }
863
770
  // Update rawString
864
- this.rawAttrs = Object.keys(attributes).map(function (name) {
771
+ this.rawAttrs = Object.keys(attributes)
772
+ .map(function (name) {
865
773
  var val = attributes[name];
866
- if (val === 'null' || val === '""') {
774
+ if (val === 'null' || val === '""')
867
775
  return name;
868
- }
869
776
  return name + "=" + _this.quoteAttribute(String(val));
870
- }).join(' ');
777
+ })
778
+ .join(' ');
871
779
  };
872
780
  HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
873
781
  var _a, _b, _c;
@@ -920,9 +828,8 @@ var HTMLElement = /** @class */ (function (_super) {
920
828
  var i = 0;
921
829
  while (i < children.length) {
922
830
  var child = children[i++];
923
- if (this === child) {
831
+ if (this === child)
924
832
  return children[i] || null;
925
- }
926
833
  }
927
834
  return null;
928
835
  }
@@ -964,12 +871,8 @@ var HTMLElement = /** @class */ (function (_super) {
964
871
  }(node_1.default));
965
872
  exports.default = HTMLElement;
966
873
  // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
967
- var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
968
- // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
969
- // <([a-z][-.:0-9_a-z]*)\s*\/>
970
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
971
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
972
- var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
874
+ var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
875
+ var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
973
876
  var kSelfClosingElements = {
974
877
  area: true,
975
878
  AREA: true,
@@ -998,7 +901,7 @@ var kSelfClosingElements = {
998
901
  track: true,
999
902
  TRACK: true,
1000
903
  wbr: true,
1001
- WBR: true
904
+ WBR: true,
1002
905
  };
1003
906
  var kElementsClosedByOpening = {
1004
907
  li: { li: true, LI: true },
@@ -1022,7 +925,7 @@ var kElementsClosedByOpening = {
1022
925
  h5: { h5: true, H5: true },
1023
926
  H5: { h5: true, H5: true },
1024
927
  h6: { h6: true, H6: true },
1025
- H6: { h6: true, H6: true }
928
+ H6: { h6: true, H6: true },
1026
929
  };
1027
930
  var kElementsClosedByClosing = {
1028
931
  li: { ul: true, ol: true, UL: true, OL: true },
@@ -1038,7 +941,7 @@ var kElementsClosedByClosing = {
1038
941
  td: { tr: true, table: true, TR: true, TABLE: true },
1039
942
  TD: { tr: true, table: true, TR: true, TABLE: true },
1040
943
  th: { tr: true, table: true, TR: true, TABLE: true },
1041
- TH: { tr: true, table: true, TR: true, TABLE: true }
944
+ TH: { tr: true, table: true, TR: true, TABLE: true },
1042
945
  };
1043
946
  var frameflag = 'documentfragmentcontainer';
1044
947
  /**
@@ -1053,45 +956,39 @@ function base_parse(data, options) {
1053
956
  script: true,
1054
957
  noscript: true,
1055
958
  style: true,
1056
- pre: true
959
+ pre: true,
1057
960
  };
1058
961
  var element_names = Object.keys(elements);
1059
- var kBlockTextElements = element_names.map(function (it) {
1060
- return new RegExp(it, 'i');
1061
- });
1062
- var kIgnoreElements = element_names.filter(function (it) {
1063
- return elements[it];
1064
- }).map(function (it) {
1065
- return new RegExp(it, 'i');
1066
- });
962
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
963
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1067
964
  function element_should_be_ignore(tag) {
1068
- return kIgnoreElements.some(function (it) {
1069
- return it.test(tag);
1070
- });
965
+ return kIgnoreElements.some(function (it) { return it.test(tag); });
1071
966
  }
1072
967
  function is_block_text_element(tag) {
1073
- return kBlockTextElements.some(function (it) {
1074
- return it.test(tag);
1075
- });
968
+ return kBlockTextElements.some(function (it) { return it.test(tag); });
1076
969
  }
1077
- var createRange = function (startPos, endPos) {
1078
- return [startPos - frameFlagOffset, endPos - frameFlagOffset];
1079
- };
970
+ var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1080
971
  var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1081
972
  var currentParent = root;
1082
973
  var stack = [root];
1083
974
  var lastTextPos = -1;
975
+ var noNestedTagIndex = undefined;
1084
976
  var match;
1085
977
  // https://github.com/taoqf/node-html-parser/issues/38
1086
978
  data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
979
+ var lowerCaseTagName = options.lowerCaseTagName;
1087
980
  var dataEndPos = data.length - (frameflag.length + 2);
1088
981
  var frameFlagOffset = frameflag.length + 2;
1089
982
  while ((match = kMarkupPattern.exec(data))) {
1090
- var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
983
+ // Note: Object destructuring here consistently tests as higher performance than array destructuring
984
+ // eslint-disable-next-line prefer-const
985
+ var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
986
+ var matchLength = matchText.length;
987
+ var tagStartPos = kMarkupPattern.lastIndex - matchLength;
1091
988
  var tagEndPos = kMarkupPattern.lastIndex;
1092
989
  // Add TextNode if content
1093
990
  if (lastTextPos > -1) {
1094
- if (lastTextPos + match[0].length < tagEndPos) {
991
+ if (lastTextPos + matchLength < tagEndPos) {
1095
992
  var text = data.substring(lastTextPos, tagStartPos);
1096
993
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
1097
994
  }
@@ -1099,10 +996,10 @@ function base_parse(data, options) {
1099
996
  lastTextPos = kMarkupPattern.lastIndex;
1100
997
  // https://github.com/taoqf/node-html-parser/issues/38
1101
998
  // Skip frameflag node
1102
- if (match[2] === frameflag)
999
+ if (tagName === frameflag)
1103
1000
  continue;
1104
1001
  // Handle comments
1105
- if (match[0][1] === '!') {
1002
+ if (matchText[1] === '!') {
1106
1003
  if (options.comment) {
1107
1004
  // Only keep what is in between <!-- and -->
1108
1005
  var text = data.substring(tagStartPos + 4, tagEndPos - 3);
@@ -1112,36 +1009,46 @@ function base_parse(data, options) {
1112
1009
  }
1113
1010
  /* -- Handle tag matching -- */
1114
1011
  // Fix tag casing if necessary
1115
- if (options.lowerCaseTagName)
1116
- match[2] = match[2].toLowerCase();
1012
+ if (lowerCaseTagName)
1013
+ tagName = tagName.toLowerCase();
1117
1014
  // Handle opening tags (ie. <this> not </that>)
1118
- if (!match[1]) {
1015
+ if (!leadingSlash) {
1119
1016
  /* Populate attributes */
1120
1017
  var attrs = {};
1121
- for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
1122
- attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
1018
+ for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
1019
+ var key = attMatch[1], val = attMatch[2];
1020
+ var isQuoted = val[0] === "'" || val[0] === "\"";
1021
+ attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
1123
1022
  }
1124
- var tagName = currentParent.rawTagName;
1125
- if (!match[4] && kElementsClosedByOpening[tagName]) {
1126
- if (kElementsClosedByOpening[tagName][match[2]]) {
1023
+ var parentTagName = currentParent.rawTagName;
1024
+ if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
1025
+ if (kElementsClosedByOpening[parentTagName][tagName]) {
1127
1026
  stack.pop();
1128
1027
  currentParent = (0, back_1.default)(stack);
1129
1028
  }
1130
1029
  }
1030
+ // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1031
+ if (tagName === 'a' || tagName === 'A') {
1032
+ if (noNestedTagIndex !== undefined) {
1033
+ stack.splice(noNestedTagIndex);
1034
+ currentParent = (0, back_1.default)(stack);
1035
+ }
1036
+ noNestedTagIndex = stack.length;
1037
+ }
1131
1038
  var tagEndPos_1 = kMarkupPattern.lastIndex;
1132
- var tagStartPos_1 = tagEndPos_1 - match[0].length;
1039
+ var tagStartPos_1 = tagEndPos_1 - matchLength;
1133
1040
  currentParent = currentParent.appendChild(
1134
1041
  // Initialize range (end position updated later for closed tags)
1135
- new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
1042
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1136
1043
  stack.push(currentParent);
1137
- if (is_block_text_element(match[2])) {
1044
+ if (is_block_text_element(tagName)) {
1138
1045
  // Find closing tag
1139
- var closeMarkup = "</" + match[2] + ">";
1140
- var closeIndex = options.lowerCaseTagName
1046
+ var closeMarkup = "</" + tagName + ">";
1047
+ var closeIndex = lowerCaseTagName
1141
1048
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1142
1049
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
1143
1050
  var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
1144
- if (element_should_be_ignore(match[2])) {
1051
+ if (element_should_be_ignore(tagName)) {
1145
1052
  var text = data.substring(tagEndPos_1, textEndPos);
1146
1053
  if (text.length > 0 && /\S/.test(text)) {
1147
1054
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
@@ -1153,14 +1060,16 @@ function base_parse(data, options) {
1153
1060
  else {
1154
1061
  lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
1155
1062
  // Cause to be treated as self-closing, because no close found
1156
- match[1] = 'true';
1063
+ leadingSlash = '/';
1157
1064
  }
1158
1065
  }
1159
1066
  }
1160
1067
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1161
- if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1068
+ if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1162
1069
  while (true) {
1163
- if (currentParent.rawTagName === match[2]) {
1070
+ if (tagName === 'a' || tagName === 'A')
1071
+ noNestedTagIndex = undefined;
1072
+ if (currentParent.rawTagName === tagName) {
1164
1073
  // Update range end for closed tag
1165
1074
  currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1166
1075
  stack.pop();
@@ -1168,10 +1077,10 @@ function base_parse(data, options) {
1168
1077
  break;
1169
1078
  }
1170
1079
  else {
1171
- var tagName = currentParent.tagName;
1080
+ var parentTagName = currentParent.tagName;
1172
1081
  // Trying to close current tag, and move on
1173
- if (kElementsClosedByClosing[tagName]) {
1174
- if (kElementsClosedByClosing[tagName][match[2]]) {
1082
+ if (kElementsClosedByClosing[parentTagName]) {
1083
+ if (kElementsClosedByClosing[parentTagName][tagName]) {
1175
1084
  stack.pop();
1176
1085
  currentParent = (0, back_1.default)(stack);
1177
1086
  continue;