node-html-parser 4.1.4 → 5.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,7 +32,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
32
32
  ar[i] = from[i];
33
33
  }
34
34
  }
35
- return to.concat(ar || from);
35
+ return to.concat(ar || Array.prototype.slice.call(from));
36
36
  };
37
37
  var __importDefault = (this && this.__importDefault) || function (mod) {
38
38
  return (mod && mod.__esModule) ? mod : { "default": mod };
@@ -47,89 +47,40 @@ var text_1 = __importDefault(require("./text"));
47
47
  var matcher_1 = __importDefault(require("../matcher"));
48
48
  var back_1 = __importDefault(require("../back"));
49
49
  var comment_1 = __importDefault(require("./comment"));
50
- // const { decode } = he;
50
+ var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
51
51
  function decode(val) {
52
52
  // clone string
53
53
  return JSON.parse(JSON.stringify(he_1.default.decode(val)));
54
54
  }
55
55
  // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
56
+ var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
57
+ var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
58
+ var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
59
+ var tableTags = ['table', 'td', 'tr'];
60
+ var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
56
61
  var kBlockElements = new Set();
57
- kBlockElements.add('address');
58
- kBlockElements.add('ADDRESS');
59
- kBlockElements.add('article');
60
- kBlockElements.add('ARTICLE');
61
- kBlockElements.add('aside');
62
- kBlockElements.add('ASIDE');
63
- kBlockElements.add('blockquote');
64
- kBlockElements.add('BLOCKQUOTE');
65
- kBlockElements.add('br');
66
- kBlockElements.add('BR');
67
- kBlockElements.add('details');
68
- kBlockElements.add('DETAILS');
69
- kBlockElements.add('dialog');
70
- kBlockElements.add('DIALOG');
71
- kBlockElements.add('dd');
72
- kBlockElements.add('DD');
73
- kBlockElements.add('div');
74
- kBlockElements.add('DIV');
75
- kBlockElements.add('dl');
76
- kBlockElements.add('DL');
77
- kBlockElements.add('dt');
78
- kBlockElements.add('DT');
79
- kBlockElements.add('fieldset');
80
- kBlockElements.add('FIELDSET');
81
- kBlockElements.add('figcaption');
82
- kBlockElements.add('FIGCAPTION');
83
- kBlockElements.add('figure');
84
- kBlockElements.add('FIGURE');
85
- kBlockElements.add('footer');
86
- kBlockElements.add('FOOTER');
87
- kBlockElements.add('form');
88
- kBlockElements.add('FORM');
89
- kBlockElements.add('h1');
90
- kBlockElements.add('H1');
91
- kBlockElements.add('h2');
92
- kBlockElements.add('H2');
93
- kBlockElements.add('h3');
94
- kBlockElements.add('H3');
95
- kBlockElements.add('h4');
96
- kBlockElements.add('H4');
97
- kBlockElements.add('h5');
98
- kBlockElements.add('H5');
99
- kBlockElements.add('h6');
100
- kBlockElements.add('H6');
101
- kBlockElements.add('header');
102
- kBlockElements.add('HEADER');
103
- kBlockElements.add('hgroup');
104
- kBlockElements.add('HGROUP');
105
- kBlockElements.add('hr');
106
- kBlockElements.add('HR');
107
- kBlockElements.add('li');
108
- kBlockElements.add('LI');
109
- kBlockElements.add('main');
110
- kBlockElements.add('MAIN');
111
- kBlockElements.add('nav');
112
- kBlockElements.add('NAV');
113
- kBlockElements.add('ol');
114
- kBlockElements.add('OL');
115
- kBlockElements.add('p');
116
- kBlockElements.add('P');
117
- kBlockElements.add('pre');
118
- kBlockElements.add('PRE');
119
- kBlockElements.add('section');
120
- kBlockElements.add('SECTION');
121
- kBlockElements.add('table');
122
- kBlockElements.add('TABLE');
123
- kBlockElements.add('td');
124
- kBlockElements.add('TD');
125
- kBlockElements.add('tr');
126
- kBlockElements.add('TR');
127
- kBlockElements.add('ul');
128
- kBlockElements.add('UL');
62
+ function addToKBlockElement() {
63
+ var args = [];
64
+ for (var _i = 0; _i < arguments.length; _i++) {
65
+ args[_i] = arguments[_i];
66
+ }
67
+ var addToSet = function (array) {
68
+ for (var index = 0; index < array.length; index++) {
69
+ var element = array[index];
70
+ kBlockElements.add(element);
71
+ kBlockElements.add(element.toUpperCase());
72
+ }
73
+ };
74
+ for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
75
+ var arg = args_1[_a];
76
+ addToSet(arg);
77
+ }
78
+ }
79
+ addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
129
80
  var DOMTokenList = /** @class */ (function () {
130
81
  function DOMTokenList(valuesInit, afterUpdate) {
131
82
  if (valuesInit === void 0) { valuesInit = []; }
132
- if (afterUpdate === void 0) { afterUpdate = (function () { return null; }); }
83
+ if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
133
84
  this._set = new Set(valuesInit);
134
85
  this._afterUpdate = afterUpdate;
135
86
  }
@@ -150,8 +101,7 @@ var DOMTokenList = /** @class */ (function () {
150
101
  this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
151
102
  };
152
103
  DOMTokenList.prototype.remove = function (c) {
153
- this._set.delete(c) &&
154
- this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
104
+ this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
155
105
  };
156
106
  DOMTokenList.prototype.toggle = function (c) {
157
107
  this._validate(c);
@@ -216,8 +166,8 @@ var HTMLElement = /** @class */ (function (_super) {
216
166
  _this.rawAttrs = rawAttrs || '';
217
167
  _this.id = keyAttrs.id || '';
218
168
  _this.childNodes = [];
219
- _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return (_this.setAttribute('class', classList.toString()) // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
220
- ); });
169
+ _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
170
+ );
221
171
  if (keyAttrs.id) {
222
172
  if (!rawAttrs) {
223
173
  _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
@@ -242,8 +192,8 @@ var HTMLElement = /** @class */ (function (_super) {
242
192
  * @returns {string} quoted value
243
193
  */
244
194
  HTMLElement.prototype.quoteAttribute = function (attr) {
245
- if (attr === null) {
246
- return "null";
195
+ if (attr == null) {
196
+ return 'null';
247
197
  }
248
198
  return JSON.stringify(attr.replace(/"/g, '&quot;'));
249
199
  };
@@ -265,7 +215,7 @@ var HTMLElement = /** @class */ (function (_super) {
265
215
  */
266
216
  HTMLElement.prototype.removeChild = function (node) {
267
217
  this.childNodes = this.childNodes.filter(function (child) {
268
- return (child !== node);
218
+ return child !== node;
269
219
  });
270
220
  };
271
221
  /**
@@ -286,6 +236,9 @@ var HTMLElement = /** @class */ (function (_super) {
286
236
  get: function () {
287
237
  return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
288
238
  },
239
+ set: function (newname) {
240
+ this.rawTagName = newname.toLowerCase();
241
+ },
289
242
  enumerable: false,
290
243
  configurable: true
291
244
  });
@@ -296,6 +249,13 @@ var HTMLElement = /** @class */ (function (_super) {
296
249
  enumerable: false,
297
250
  configurable: true
298
251
  });
252
+ Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
253
+ get: function () {
254
+ return voidTags.has(this.localName);
255
+ },
256
+ enumerable: false,
257
+ configurable: true
258
+ });
299
259
  Object.defineProperty(HTMLElement.prototype, "rawText", {
300
260
  /**
301
261
  * Get escpaed (as-it) text value of current node and its children.
@@ -343,11 +303,11 @@ var HTMLElement = /** @class */ (function (_super) {
343
303
  if (node.nodeType === type_1.default.ELEMENT_NODE) {
344
304
  if (kBlockElements.has(node.rawTagName)) {
345
305
  if (currentBlock.length > 0) {
346
- blocks.push(currentBlock = []);
306
+ blocks.push((currentBlock = []));
347
307
  }
348
308
  node.childNodes.forEach(dfs);
349
309
  if (currentBlock.length > 0) {
350
- blocks.push(currentBlock = []);
310
+ blocks.push((currentBlock = []));
351
311
  }
352
312
  }
353
313
  else {
@@ -370,11 +330,12 @@ var HTMLElement = /** @class */ (function (_super) {
370
330
  }
371
331
  }
372
332
  dfs(this);
373
- return blocks.map(function (block) {
374
- // Normalize each line's whitespace
375
- return block.join('').replace(/\s{2,}/g, ' ');
333
+ return blocks
334
+ .map(function (block) {
335
+ return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
376
336
  })
377
- .join('\n').replace(/\s+$/, ''); // trimRight;
337
+ .join('\n')
338
+ .replace(/\s+$/, ''); // trimRight;
378
339
  },
379
340
  enumerable: false,
380
341
  configurable: true
@@ -382,22 +343,18 @@ var HTMLElement = /** @class */ (function (_super) {
382
343
  HTMLElement.prototype.toString = function () {
383
344
  var tag = this.rawTagName;
384
345
  if (tag) {
385
- // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
386
- // const is_void = void_tags.has(tag);
387
- var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
388
346
  var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
389
- if (is_void) {
390
- return "<" + tag + attrs + ">";
391
- }
392
- return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
347
+ return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
393
348
  }
394
349
  return this.innerHTML;
395
350
  };
396
351
  Object.defineProperty(HTMLElement.prototype, "innerHTML", {
397
352
  get: function () {
398
- return this.childNodes.map(function (child) {
353
+ return this.childNodes
354
+ .map(function (child) {
399
355
  return child.toString();
400
- }).join('');
356
+ })
357
+ .join('');
401
358
  },
402
359
  set: function (content) {
403
360
  //const r = parse(content, global.options); // TODO global.options ?
@@ -424,7 +381,8 @@ var HTMLElement = /** @class */ (function (_super) {
424
381
  for (var _i = 0; _i < arguments.length; _i++) {
425
382
  nodes[_i] = arguments[_i];
426
383
  }
427
- var content = nodes.map(function (node) {
384
+ var content = nodes
385
+ .map(function (node) {
428
386
  if (node instanceof node_1.default) {
429
387
  return [node];
430
388
  }
@@ -434,7 +392,8 @@ var HTMLElement = /** @class */ (function (_super) {
434
392
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
435
393
  }
436
394
  return [];
437
- }).flat();
395
+ })
396
+ .flat();
438
397
  var idx = this.parentNode.childNodes.findIndex(function (child) {
439
398
  return child === _this;
440
399
  });
@@ -481,8 +440,8 @@ var HTMLElement = /** @class */ (function (_super) {
481
440
  res.push(' '.repeat(indention) + str);
482
441
  }
483
442
  function dfs(node) {
484
- var idStr = node.id ? ("#" + node.id) : '';
485
- var classStr = node.classList.length ? ("." + node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
443
+ var idStr = node.id ? "#" + node.id : '';
444
+ var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
486
445
  write("" + node.rawTagName + idStr + classStr);
487
446
  indention++;
488
447
  node.childNodes.forEach(function (childNode) {
@@ -533,113 +492,57 @@ var HTMLElement = /** @class */ (function (_super) {
533
492
  HTMLElement.prototype.querySelectorAll = function (selector) {
534
493
  return (0, css_select_1.selectAll)(selector, this, {
535
494
  xmlMode: true,
536
- adapter: matcher_1.default
495
+ adapter: matcher_1.default,
537
496
  });
538
- // let matcher: Matcher;
539
- // if (selector instanceof Matcher) {
540
- // matcher = selector;
541
- // matcher.reset();
542
- // } else {
543
- // if (selector.includes(',')) {
544
- // const selectors = selector.split(',');
545
- // return Array.from(selectors.reduce((pre, cur) => {
546
- // const result = this.querySelectorAll(cur.trim());
547
- // return result.reduce((p, c) => {
548
- // return p.add(c);
549
- // }, pre);
550
- // }, new Set<HTMLElement>()));
551
- // }
552
- // matcher = new Matcher(selector);
553
- // }
554
- // interface IStack {
555
- // 0: Node; // node
556
- // 1: number; // children
557
- // 2: boolean; // found flag
558
- // }
559
- // const stack = [] as IStack[];
560
- // return this.childNodes.reduce((res, cur) => {
561
- // stack.push([cur, 0, false]);
562
- // while (stack.length) {
563
- // const state = arr_back(stack); // get last element
564
- // const el = state[0];
565
- // if (state[1] === 0) {
566
- // // Seen for first time.
567
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
568
- // stack.pop();
569
- // continue;
570
- // }
571
- // const html_el = el as HTMLElement;
572
- // state[2] = matcher.advance(html_el);
573
- // if (state[2]) {
574
- // if (matcher.matched) {
575
- // res.push(html_el);
576
- // res.push(...(html_el.querySelectorAll(selector)));
577
- // // no need to go further.
578
- // matcher.rewind();
579
- // stack.pop();
580
- // continue;
581
- // }
582
- // }
583
- // }
584
- // if (state[1] < el.childNodes.length) {
585
- // stack.push([el.childNodes[state[1]++], 0, false]);
586
- // } else {
587
- // if (state[2]) {
588
- // matcher.rewind();
589
- // }
590
- // stack.pop();
591
- // }
592
- // }
593
- // return res;
594
- // }, [] as HTMLElement[]);
595
497
  };
596
498
  /**
597
499
  * Query CSS Selector to find matching node.
598
500
  * @param {string} selector Simplified CSS selector
599
- * @return {HTMLElement} matching node
501
+ * @return {(HTMLElement|null)} matching node
600
502
  */
601
503
  HTMLElement.prototype.querySelector = function (selector) {
602
504
  return (0, css_select_1.selectOne)(selector, this, {
603
505
  xmlMode: true,
604
- adapter: matcher_1.default
506
+ adapter: matcher_1.default,
605
507
  });
606
- // let matcher: Matcher;
607
- // if (selector instanceof Matcher) {
608
- // matcher = selector;
609
- // matcher.reset();
610
- // } else {
611
- // matcher = new Matcher(selector);
612
- // }
613
- // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
614
- // for (const node of this.childNodes) {
615
- // stack.push([node, 0, false]);
616
- // while (stack.length) {
617
- // const state = arr_back(stack);
618
- // const el = state[0];
619
- // if (state[1] === 0) {
620
- // // Seen for first time.
621
- // if (el.nodeType !== NodeType.ELEMENT_NODE) {
622
- // stack.pop();
623
- // continue;
624
- // }
625
- // state[2] = matcher.advance(el as HTMLElement);
626
- // if (state[2]) {
627
- // if (matcher.matched) {
628
- // return el as HTMLElement;
629
- // }
630
- // }
631
- // }
632
- // if (state[1] < el.childNodes.length) {
633
- // stack.push([el.childNodes[state[1]++], 0, false]);
634
- // } else {
635
- // if (state[2]) {
636
- // matcher.rewind();
637
- // }
638
- // stack.pop();
639
- // }
640
- // }
641
- // }
642
- // return null;
508
+ };
509
+ /**
510
+ * find elements by their tagName
511
+ * @param {string} tagName the tagName of the elements to select
512
+ */
513
+ HTMLElement.prototype.getElementsByTagName = function (tagName) {
514
+ var upperCasedTagName = tagName.toUpperCase();
515
+ var re = [];
516
+ var stack = [];
517
+ var currentNodeReference = this;
518
+ var index = 0;
519
+ // index turns to undefined once the stack is empty and the first condition occurs
520
+ // which happens once all relevant children are searched through
521
+ while (index !== undefined) {
522
+ var child = void 0;
523
+ // make it work with sparse arrays
524
+ do {
525
+ child = currentNodeReference.childNodes[index++];
526
+ } while (index < currentNodeReference.childNodes.length && child === undefined);
527
+ // if the child does not exist we move on with the last provided index (which belongs to the parentNode)
528
+ if (child === undefined) {
529
+ currentNodeReference = currentNodeReference.parentNode;
530
+ index = stack.pop();
531
+ continue;
532
+ }
533
+ if (child.nodeType === type_1.default.ELEMENT_NODE) {
534
+ // https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
535
+ if (tagName === '*' || child.tagName === upperCasedTagName)
536
+ re.push(child);
537
+ // if children are existing push the current status to the stack and keep searching for elements in the level below
538
+ if (child.childNodes.length > 0) {
539
+ stack.push(index);
540
+ currentNodeReference = child;
541
+ index = 0;
542
+ }
543
+ }
544
+ }
545
+ return re;
643
546
  };
644
547
  /**
645
548
  * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
@@ -681,7 +584,7 @@ var HTMLElement = /** @class */ (function (_super) {
681
584
  return [node];
682
585
  }, findOne: findOne, findAll: function () {
683
586
  return [];
684
- } })
587
+ } }),
685
588
  });
686
589
  if (e) {
687
590
  return e;
@@ -759,7 +662,7 @@ var HTMLElement = /** @class */ (function (_super) {
759
662
  });
760
663
  Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
761
664
  /**
762
- * Get escaped (as-it) attributes
665
+ * Get escaped (as-is) attributes
763
666
  * @return {Object} parsed attributes
764
667
  */
765
668
  get: function () {
@@ -768,10 +671,14 @@ var HTMLElement = /** @class */ (function (_super) {
768
671
  }
769
672
  var attrs = {};
770
673
  if (this.rawAttrs) {
771
- var re = /([a-z()#][a-z0-9-_:()#]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
674
+ var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
772
675
  var match = void 0;
773
676
  while ((match = re.exec(this.rawAttrs))) {
774
- attrs[match[1]] = match[2] || match[3] || match[4] || null;
677
+ var key = match[1];
678
+ var val = match[2] || null;
679
+ if (val && (val[0] === "'" || val[0] === "\""))
680
+ val = val.slice(1, val.length - 1);
681
+ attrs[key] = val;
775
682
  }
776
683
  }
777
684
  this._rawAttrs = attrs;
@@ -788,13 +695,15 @@ var HTMLElement = /** @class */ (function (_super) {
788
695
  delete this._attrs[key];
789
696
  }
790
697
  // Update rawString
791
- this.rawAttrs = Object.keys(attrs).map(function (name) {
698
+ this.rawAttrs = Object.keys(attrs)
699
+ .map(function (name) {
792
700
  var val = JSON.stringify(attrs[name]);
793
701
  if (val === undefined || val === 'null') {
794
702
  return name;
795
703
  }
796
704
  return name + "=" + val;
797
- }).join(' ');
705
+ })
706
+ .join(' ');
798
707
  // Update this.id
799
708
  if (key === 'id') {
800
709
  this.id = '';
@@ -818,7 +727,7 @@ var HTMLElement = /** @class */ (function (_super) {
818
727
  HTMLElement.prototype.setAttribute = function (key, value) {
819
728
  var _this = this;
820
729
  if (arguments.length < 2) {
821
- throw new Error('Failed to execute \'setAttribute\' on \'Element\'');
730
+ throw new Error("Failed to execute 'setAttribute' on 'Element'");
822
731
  }
823
732
  var k2 = key.toLowerCase();
824
733
  var attrs = this.rawAttributes;
@@ -834,13 +743,14 @@ var HTMLElement = /** @class */ (function (_super) {
834
743
  this._attrs[k2] = decode(attrs[key]);
835
744
  }
836
745
  // Update rawString
837
- this.rawAttrs = Object.keys(attrs).map(function (name) {
746
+ this.rawAttrs = Object.keys(attrs)
747
+ .map(function (name) {
838
748
  var val = _this.quoteAttribute(attrs[name]);
839
- if (val === 'null' || val === '""') {
749
+ if (val === 'null' || val === '""')
840
750
  return name;
841
- }
842
751
  return name + "=" + val;
843
- }).join(' ');
752
+ })
753
+ .join(' ');
844
754
  // Update this.id
845
755
  if (key === 'id') {
846
756
  this.id = value;
@@ -861,13 +771,14 @@ var HTMLElement = /** @class */ (function (_super) {
861
771
  delete this._rawAttrs;
862
772
  }
863
773
  // Update rawString
864
- this.rawAttrs = Object.keys(attributes).map(function (name) {
774
+ this.rawAttrs = Object.keys(attributes)
775
+ .map(function (name) {
865
776
  var val = attributes[name];
866
- if (val === 'null' || val === '""') {
777
+ if (val === 'null' || val === '""')
867
778
  return name;
868
- }
869
779
  return name + "=" + _this.quoteAttribute(String(val));
870
- }).join(' ');
780
+ })
781
+ .join(' ');
871
782
  };
872
783
  HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
873
784
  var _a, _b, _c;
@@ -920,9 +831,8 @@ var HTMLElement = /** @class */ (function (_super) {
920
831
  var i = 0;
921
832
  while (i < children.length) {
922
833
  var child = children[i++];
923
- if (this === child) {
834
+ if (this === child)
924
835
  return children[i] || null;
925
- }
926
836
  }
927
837
  return null;
928
838
  }
@@ -964,12 +874,8 @@ var HTMLElement = /** @class */ (function (_super) {
964
874
  }(node_1.default));
965
875
  exports.default = HTMLElement;
966
876
  // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
967
- var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*([^>]*?)(\/?)>/ig;
968
- // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
969
- // <([a-z][-.:0-9_a-z]*)\s*\/>
970
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
971
- // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
972
- var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
877
+ var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
878
+ var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
973
879
  var kSelfClosingElements = {
974
880
  area: true,
975
881
  AREA: true,
@@ -998,7 +904,7 @@ var kSelfClosingElements = {
998
904
  track: true,
999
905
  TRACK: true,
1000
906
  wbr: true,
1001
- WBR: true
907
+ WBR: true,
1002
908
  };
1003
909
  var kElementsClosedByOpening = {
1004
910
  li: { li: true, LI: true },
@@ -1022,7 +928,7 @@ var kElementsClosedByOpening = {
1022
928
  h5: { h5: true, H5: true },
1023
929
  H5: { h5: true, H5: true },
1024
930
  h6: { h6: true, H6: true },
1025
- H6: { h6: true, H6: true }
931
+ H6: { h6: true, H6: true },
1026
932
  };
1027
933
  var kElementsClosedByClosing = {
1028
934
  li: { ul: true, ol: true, UL: true, OL: true },
@@ -1038,7 +944,7 @@ var kElementsClosedByClosing = {
1038
944
  td: { tr: true, table: true, TR: true, TABLE: true },
1039
945
  TD: { tr: true, table: true, TR: true, TABLE: true },
1040
946
  th: { tr: true, table: true, TR: true, TABLE: true },
1041
- TH: { tr: true, table: true, TR: true, TABLE: true }
947
+ TH: { tr: true, table: true, TR: true, TABLE: true },
1042
948
  };
1043
949
  var frameflag = 'documentfragmentcontainer';
1044
950
  /**
@@ -1053,45 +959,39 @@ function base_parse(data, options) {
1053
959
  script: true,
1054
960
  noscript: true,
1055
961
  style: true,
1056
- pre: true
962
+ pre: true,
1057
963
  };
1058
964
  var element_names = Object.keys(elements);
1059
- var kBlockTextElements = element_names.map(function (it) {
1060
- return new RegExp(it, 'i');
1061
- });
1062
- var kIgnoreElements = element_names.filter(function (it) {
1063
- return elements[it];
1064
- }).map(function (it) {
1065
- return new RegExp(it, 'i');
1066
- });
965
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
966
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1067
967
  function element_should_be_ignore(tag) {
1068
- return kIgnoreElements.some(function (it) {
1069
- return it.test(tag);
1070
- });
968
+ return kIgnoreElements.some(function (it) { return it.test(tag); });
1071
969
  }
1072
970
  function is_block_text_element(tag) {
1073
- return kBlockTextElements.some(function (it) {
1074
- return it.test(tag);
1075
- });
971
+ return kBlockTextElements.some(function (it) { return it.test(tag); });
1076
972
  }
1077
- var createRange = function (startPos, endPos) {
1078
- return [startPos - frameFlagOffset, endPos - frameFlagOffset];
1079
- };
973
+ var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1080
974
  var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1081
975
  var currentParent = root;
1082
976
  var stack = [root];
1083
977
  var lastTextPos = -1;
978
+ var noNestedTagIndex = undefined;
1084
979
  var match;
1085
980
  // https://github.com/taoqf/node-html-parser/issues/38
1086
981
  data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
982
+ var lowerCaseTagName = options.lowerCaseTagName;
1087
983
  var dataEndPos = data.length - (frameflag.length + 2);
1088
984
  var frameFlagOffset = frameflag.length + 2;
1089
985
  while ((match = kMarkupPattern.exec(data))) {
1090
- var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
986
+ // Note: Object destructuring here consistently tests as higher performance than array destructuring
987
+ // eslint-disable-next-line prefer-const
988
+ var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
989
+ var matchLength = matchText.length;
990
+ var tagStartPos = kMarkupPattern.lastIndex - matchLength;
1091
991
  var tagEndPos = kMarkupPattern.lastIndex;
1092
992
  // Add TextNode if content
1093
993
  if (lastTextPos > -1) {
1094
- if (lastTextPos + match[0].length < tagEndPos) {
994
+ if (lastTextPos + matchLength < tagEndPos) {
1095
995
  var text = data.substring(lastTextPos, tagStartPos);
1096
996
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
1097
997
  }
@@ -1099,10 +999,10 @@ function base_parse(data, options) {
1099
999
  lastTextPos = kMarkupPattern.lastIndex;
1100
1000
  // https://github.com/taoqf/node-html-parser/issues/38
1101
1001
  // Skip frameflag node
1102
- if (match[2] === frameflag)
1002
+ if (tagName === frameflag)
1103
1003
  continue;
1104
1004
  // Handle comments
1105
- if (match[0][1] === '!') {
1005
+ if (matchText[1] === '!') {
1106
1006
  if (options.comment) {
1107
1007
  // Only keep what is in between <!-- and -->
1108
1008
  var text = data.substring(tagStartPos + 4, tagEndPos - 3);
@@ -1112,42 +1012,46 @@ function base_parse(data, options) {
1112
1012
  }
1113
1013
  /* -- Handle tag matching -- */
1114
1014
  // Fix tag casing if necessary
1115
- if (options.lowerCaseTagName)
1116
- match[2] = match[2].toLowerCase();
1015
+ if (lowerCaseTagName)
1016
+ tagName = tagName.toLowerCase();
1117
1017
  // Handle opening tags (ie. <this> not </that>)
1118
- if (!match[1]) {
1018
+ if (!leadingSlash) {
1119
1019
  /* Populate attributes */
1120
1020
  var attrs = {};
1121
- for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
1122
- attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
1021
+ for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
1022
+ var key = attMatch[1], val = attMatch[2];
1023
+ var isQuoted = val[0] === "'" || val[0] === "\"";
1024
+ attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
1123
1025
  }
1124
- var tagName = currentParent.rawTagName;
1125
- if (!match[4] && kElementsClosedByOpening[tagName]) {
1126
- if (kElementsClosedByOpening[tagName][match[2]]) {
1026
+ var parentTagName = currentParent.rawTagName;
1027
+ if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
1028
+ if (kElementsClosedByOpening[parentTagName][tagName]) {
1127
1029
  stack.pop();
1128
1030
  currentParent = (0, back_1.default)(stack);
1129
1031
  }
1130
1032
  }
1131
- // console.error('111111111111111111', currentParent.rawTagName);
1132
- // console.error('22222222222222222222', match);
1133
- if (currentParent.rawTagName === 'a' && match[2] === 'a') {
1134
- stack.pop();
1135
- currentParent = (0, back_1.default)(stack);
1033
+ // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1034
+ if (tagName === 'a' || tagName === 'A') {
1035
+ if (noNestedTagIndex !== undefined) {
1036
+ stack.splice(noNestedTagIndex);
1037
+ currentParent = (0, back_1.default)(stack);
1038
+ }
1039
+ noNestedTagIndex = stack.length;
1136
1040
  }
1137
1041
  var tagEndPos_1 = kMarkupPattern.lastIndex;
1138
- var tagStartPos_1 = tagEndPos_1 - match[0].length;
1042
+ var tagStartPos_1 = tagEndPos_1 - matchLength;
1139
1043
  currentParent = currentParent.appendChild(
1140
1044
  // Initialize range (end position updated later for closed tags)
1141
- new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
1045
+ new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
1142
1046
  stack.push(currentParent);
1143
- if (is_block_text_element(match[2])) {
1047
+ if (is_block_text_element(tagName)) {
1144
1048
  // Find closing tag
1145
- var closeMarkup = "</" + match[2] + ">";
1146
- var closeIndex = options.lowerCaseTagName
1049
+ var closeMarkup = "</" + tagName + ">";
1050
+ var closeIndex = lowerCaseTagName
1147
1051
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1148
1052
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
1149
1053
  var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
1150
- if (element_should_be_ignore(match[2])) {
1054
+ if (element_should_be_ignore(tagName)) {
1151
1055
  var text = data.substring(tagEndPos_1, textEndPos);
1152
1056
  if (text.length > 0 && /\S/.test(text)) {
1153
1057
  currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
@@ -1159,14 +1063,16 @@ function base_parse(data, options) {
1159
1063
  else {
1160
1064
  lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
1161
1065
  // Cause to be treated as self-closing, because no close found
1162
- match[1] = 'true';
1066
+ leadingSlash = '/';
1163
1067
  }
1164
1068
  }
1165
1069
  }
1166
1070
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1167
- if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1071
+ if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1168
1072
  while (true) {
1169
- if (currentParent.rawTagName === match[2]) {
1073
+ if (tagName === 'a' || tagName === 'A')
1074
+ noNestedTagIndex = undefined;
1075
+ if (currentParent.rawTagName === tagName) {
1170
1076
  // Update range end for closed tag
1171
1077
  currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1172
1078
  stack.pop();
@@ -1174,10 +1080,10 @@ function base_parse(data, options) {
1174
1080
  break;
1175
1081
  }
1176
1082
  else {
1177
- var tagName = currentParent.tagName;
1083
+ var parentTagName = currentParent.tagName;
1178
1084
  // Trying to close current tag, and move on
1179
- if (kElementsClosedByClosing[tagName]) {
1180
- if (kElementsClosedByClosing[tagName][match[2]]) {
1085
+ if (kElementsClosedByClosing[parentTagName]) {
1086
+ if (kElementsClosedByClosing[parentTagName][tagName]) {
1181
1087
  stack.pop();
1182
1088
  currentParent = (0, back_1.default)(stack);
1183
1089
  continue;