node-html-parser 4.1.3 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +10 -4
- package/dist/main.js +175 -266
- package/dist/nodes/html.d.ts +11 -5
- package/dist/nodes/html.js +175 -266
- package/esm/index.js +11 -0
- package/esm/package.json +3 -0
- package/package.json +39 -17
- package/.eslintignore +0 -3
- package/.eslintrc.json +0 -226
- package/.mocharc.yaml +0 -1
- package/dist/esm/back.js +0 -3
- package/dist/esm/index.js +0 -7
- package/dist/esm/matcher.js +0 -101
- package/dist/esm/nodes/comment.js +0 -23
- package/dist/esm/nodes/html.js +0 -1096
- package/dist/esm/nodes/node.js +0 -25
- package/dist/esm/nodes/text.js +0 -95
- package/dist/esm/nodes/type.js +0 -7
- package/dist/esm/parse.js +0 -1
- package/dist/esm/valid.js +0 -9
package/dist/main.js
CHANGED
|
@@ -34,7 +34,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
|
34
34
|
ar[i] = from[i];
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
-
return to.concat(ar || from);
|
|
37
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
38
38
|
};
|
|
39
39
|
define("back", ["require", "exports"], function (require, exports) {
|
|
40
40
|
"use strict";
|
|
@@ -293,89 +293,40 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
293
293
|
matcher_1 = __importDefault(matcher_1);
|
|
294
294
|
back_1 = __importDefault(back_1);
|
|
295
295
|
comment_1 = __importDefault(comment_1);
|
|
296
|
-
|
|
296
|
+
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
297
297
|
function decode(val) {
|
|
298
298
|
// clone string
|
|
299
299
|
return JSON.parse(JSON.stringify(he_2.default.decode(val)));
|
|
300
300
|
}
|
|
301
301
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
302
|
+
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
303
|
+
var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
|
|
304
|
+
var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
|
|
305
|
+
var tableTags = ['table', 'td', 'tr'];
|
|
306
|
+
var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
|
|
302
307
|
var kBlockElements = new Set();
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
kBlockElements.add('dl');
|
|
322
|
-
kBlockElements.add('DL');
|
|
323
|
-
kBlockElements.add('dt');
|
|
324
|
-
kBlockElements.add('DT');
|
|
325
|
-
kBlockElements.add('fieldset');
|
|
326
|
-
kBlockElements.add('FIELDSET');
|
|
327
|
-
kBlockElements.add('figcaption');
|
|
328
|
-
kBlockElements.add('FIGCAPTION');
|
|
329
|
-
kBlockElements.add('figure');
|
|
330
|
-
kBlockElements.add('FIGURE');
|
|
331
|
-
kBlockElements.add('footer');
|
|
332
|
-
kBlockElements.add('FOOTER');
|
|
333
|
-
kBlockElements.add('form');
|
|
334
|
-
kBlockElements.add('FORM');
|
|
335
|
-
kBlockElements.add('h1');
|
|
336
|
-
kBlockElements.add('H1');
|
|
337
|
-
kBlockElements.add('h2');
|
|
338
|
-
kBlockElements.add('H2');
|
|
339
|
-
kBlockElements.add('h3');
|
|
340
|
-
kBlockElements.add('H3');
|
|
341
|
-
kBlockElements.add('h4');
|
|
342
|
-
kBlockElements.add('H4');
|
|
343
|
-
kBlockElements.add('h5');
|
|
344
|
-
kBlockElements.add('H5');
|
|
345
|
-
kBlockElements.add('h6');
|
|
346
|
-
kBlockElements.add('H6');
|
|
347
|
-
kBlockElements.add('header');
|
|
348
|
-
kBlockElements.add('HEADER');
|
|
349
|
-
kBlockElements.add('hgroup');
|
|
350
|
-
kBlockElements.add('HGROUP');
|
|
351
|
-
kBlockElements.add('hr');
|
|
352
|
-
kBlockElements.add('HR');
|
|
353
|
-
kBlockElements.add('li');
|
|
354
|
-
kBlockElements.add('LI');
|
|
355
|
-
kBlockElements.add('main');
|
|
356
|
-
kBlockElements.add('MAIN');
|
|
357
|
-
kBlockElements.add('nav');
|
|
358
|
-
kBlockElements.add('NAV');
|
|
359
|
-
kBlockElements.add('ol');
|
|
360
|
-
kBlockElements.add('OL');
|
|
361
|
-
kBlockElements.add('p');
|
|
362
|
-
kBlockElements.add('P');
|
|
363
|
-
kBlockElements.add('pre');
|
|
364
|
-
kBlockElements.add('PRE');
|
|
365
|
-
kBlockElements.add('section');
|
|
366
|
-
kBlockElements.add('SECTION');
|
|
367
|
-
kBlockElements.add('table');
|
|
368
|
-
kBlockElements.add('TABLE');
|
|
369
|
-
kBlockElements.add('td');
|
|
370
|
-
kBlockElements.add('TD');
|
|
371
|
-
kBlockElements.add('tr');
|
|
372
|
-
kBlockElements.add('TR');
|
|
373
|
-
kBlockElements.add('ul');
|
|
374
|
-
kBlockElements.add('UL');
|
|
308
|
+
function addToKBlockElement() {
|
|
309
|
+
var args = [];
|
|
310
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
311
|
+
args[_i] = arguments[_i];
|
|
312
|
+
}
|
|
313
|
+
var addToSet = function (array) {
|
|
314
|
+
for (var index = 0; index < array.length; index++) {
|
|
315
|
+
var element = array[index];
|
|
316
|
+
kBlockElements.add(element);
|
|
317
|
+
kBlockElements.add(element.toUpperCase());
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
|
|
321
|
+
var arg = args_1[_a];
|
|
322
|
+
addToSet(arg);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
|
|
375
326
|
var DOMTokenList = /** @class */ (function () {
|
|
376
327
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
377
328
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
378
|
-
if (afterUpdate === void 0) { afterUpdate =
|
|
329
|
+
if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
|
|
379
330
|
this._set = new Set(valuesInit);
|
|
380
331
|
this._afterUpdate = afterUpdate;
|
|
381
332
|
}
|
|
@@ -396,8 +347,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
396
347
|
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
397
348
|
};
|
|
398
349
|
DOMTokenList.prototype.remove = function (c) {
|
|
399
|
-
this._set.delete(c) &&
|
|
400
|
-
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
350
|
+
this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
401
351
|
};
|
|
402
352
|
DOMTokenList.prototype.toggle = function (c) {
|
|
403
353
|
this._validate(c);
|
|
@@ -462,8 +412,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
462
412
|
_this.rawAttrs = rawAttrs || '';
|
|
463
413
|
_this.id = keyAttrs.id || '';
|
|
464
414
|
_this.childNodes = [];
|
|
465
|
-
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return
|
|
466
|
-
);
|
|
415
|
+
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
416
|
+
);
|
|
467
417
|
if (keyAttrs.id) {
|
|
468
418
|
if (!rawAttrs) {
|
|
469
419
|
_this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
|
|
@@ -488,8 +438,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
488
438
|
* @returns {string} quoted value
|
|
489
439
|
*/
|
|
490
440
|
HTMLElement.prototype.quoteAttribute = function (attr) {
|
|
491
|
-
if (attr
|
|
492
|
-
return
|
|
441
|
+
if (attr == null) {
|
|
442
|
+
return 'null';
|
|
493
443
|
}
|
|
494
444
|
return JSON.stringify(attr.replace(/"/g, '"'));
|
|
495
445
|
};
|
|
@@ -511,7 +461,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
511
461
|
*/
|
|
512
462
|
HTMLElement.prototype.removeChild = function (node) {
|
|
513
463
|
this.childNodes = this.childNodes.filter(function (child) {
|
|
514
|
-
return
|
|
464
|
+
return child !== node;
|
|
515
465
|
});
|
|
516
466
|
};
|
|
517
467
|
/**
|
|
@@ -542,6 +492,13 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
542
492
|
enumerable: false,
|
|
543
493
|
configurable: true
|
|
544
494
|
});
|
|
495
|
+
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
|
|
496
|
+
get: function () {
|
|
497
|
+
return voidTags.has(this.localName);
|
|
498
|
+
},
|
|
499
|
+
enumerable: false,
|
|
500
|
+
configurable: true
|
|
501
|
+
});
|
|
545
502
|
Object.defineProperty(HTMLElement.prototype, "rawText", {
|
|
546
503
|
/**
|
|
547
504
|
* Get escpaed (as-it) text value of current node and its children.
|
|
@@ -589,11 +546,11 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
589
546
|
if (node.nodeType === type_3.default.ELEMENT_NODE) {
|
|
590
547
|
if (kBlockElements.has(node.rawTagName)) {
|
|
591
548
|
if (currentBlock.length > 0) {
|
|
592
|
-
blocks.push(currentBlock = []);
|
|
549
|
+
blocks.push((currentBlock = []));
|
|
593
550
|
}
|
|
594
551
|
node.childNodes.forEach(dfs);
|
|
595
552
|
if (currentBlock.length > 0) {
|
|
596
|
-
blocks.push(currentBlock = []);
|
|
553
|
+
blocks.push((currentBlock = []));
|
|
597
554
|
}
|
|
598
555
|
}
|
|
599
556
|
else {
|
|
@@ -616,11 +573,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
616
573
|
}
|
|
617
574
|
}
|
|
618
575
|
dfs(this);
|
|
619
|
-
return blocks
|
|
620
|
-
|
|
621
|
-
return block.join('').replace(/\s{2,}/g, ' ');
|
|
576
|
+
return blocks
|
|
577
|
+
.map(function (block) {
|
|
578
|
+
return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
|
|
622
579
|
})
|
|
623
|
-
.join('\n')
|
|
580
|
+
.join('\n')
|
|
581
|
+
.replace(/\s+$/, ''); // trimRight;
|
|
624
582
|
},
|
|
625
583
|
enumerable: false,
|
|
626
584
|
configurable: true
|
|
@@ -628,22 +586,18 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
628
586
|
HTMLElement.prototype.toString = function () {
|
|
629
587
|
var tag = this.rawTagName;
|
|
630
588
|
if (tag) {
|
|
631
|
-
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
|
|
632
|
-
// const is_void = void_tags.has(tag);
|
|
633
|
-
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
634
589
|
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
|
|
635
|
-
|
|
636
|
-
return "<" + tag + attrs + ">";
|
|
637
|
-
}
|
|
638
|
-
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
590
|
+
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
639
591
|
}
|
|
640
592
|
return this.innerHTML;
|
|
641
593
|
};
|
|
642
594
|
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
|
|
643
595
|
get: function () {
|
|
644
|
-
return this.childNodes
|
|
596
|
+
return this.childNodes
|
|
597
|
+
.map(function (child) {
|
|
645
598
|
return child.toString();
|
|
646
|
-
})
|
|
599
|
+
})
|
|
600
|
+
.join('');
|
|
647
601
|
},
|
|
648
602
|
set: function (content) {
|
|
649
603
|
//const r = parse(content, global.options); // TODO global.options ?
|
|
@@ -670,7 +624,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
670
624
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
671
625
|
nodes[_i] = arguments[_i];
|
|
672
626
|
}
|
|
673
|
-
var content = nodes
|
|
627
|
+
var content = nodes
|
|
628
|
+
.map(function (node) {
|
|
674
629
|
if (node instanceof node_2.default) {
|
|
675
630
|
return [node];
|
|
676
631
|
}
|
|
@@ -680,7 +635,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
680
635
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
681
636
|
}
|
|
682
637
|
return [];
|
|
683
|
-
})
|
|
638
|
+
})
|
|
639
|
+
.flat();
|
|
684
640
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
685
641
|
return child === _this;
|
|
686
642
|
});
|
|
@@ -727,8 +683,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
727
683
|
res.push(' '.repeat(indention) + str);
|
|
728
684
|
}
|
|
729
685
|
function dfs(node) {
|
|
730
|
-
var idStr = node.id ?
|
|
731
|
-
var classStr = node.classList.length ?
|
|
686
|
+
var idStr = node.id ? "#" + node.id : '';
|
|
687
|
+
var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
732
688
|
write("" + node.rawTagName + idStr + classStr);
|
|
733
689
|
indention++;
|
|
734
690
|
node.childNodes.forEach(function (childNode) {
|
|
@@ -779,113 +735,57 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
779
735
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
780
736
|
return (0, css_select_1.selectAll)(selector, this, {
|
|
781
737
|
xmlMode: true,
|
|
782
|
-
adapter: matcher_1.default
|
|
738
|
+
adapter: matcher_1.default,
|
|
783
739
|
});
|
|
784
|
-
// let matcher: Matcher;
|
|
785
|
-
// if (selector instanceof Matcher) {
|
|
786
|
-
// matcher = selector;
|
|
787
|
-
// matcher.reset();
|
|
788
|
-
// } else {
|
|
789
|
-
// if (selector.includes(',')) {
|
|
790
|
-
// const selectors = selector.split(',');
|
|
791
|
-
// return Array.from(selectors.reduce((pre, cur) => {
|
|
792
|
-
// const result = this.querySelectorAll(cur.trim());
|
|
793
|
-
// return result.reduce((p, c) => {
|
|
794
|
-
// return p.add(c);
|
|
795
|
-
// }, pre);
|
|
796
|
-
// }, new Set<HTMLElement>()));
|
|
797
|
-
// }
|
|
798
|
-
// matcher = new Matcher(selector);
|
|
799
|
-
// }
|
|
800
|
-
// interface IStack {
|
|
801
|
-
// 0: Node; // node
|
|
802
|
-
// 1: number; // children
|
|
803
|
-
// 2: boolean; // found flag
|
|
804
|
-
// }
|
|
805
|
-
// const stack = [] as IStack[];
|
|
806
|
-
// return this.childNodes.reduce((res, cur) => {
|
|
807
|
-
// stack.push([cur, 0, false]);
|
|
808
|
-
// while (stack.length) {
|
|
809
|
-
// const state = arr_back(stack); // get last element
|
|
810
|
-
// const el = state[0];
|
|
811
|
-
// if (state[1] === 0) {
|
|
812
|
-
// // Seen for first time.
|
|
813
|
-
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
814
|
-
// stack.pop();
|
|
815
|
-
// continue;
|
|
816
|
-
// }
|
|
817
|
-
// const html_el = el as HTMLElement;
|
|
818
|
-
// state[2] = matcher.advance(html_el);
|
|
819
|
-
// if (state[2]) {
|
|
820
|
-
// if (matcher.matched) {
|
|
821
|
-
// res.push(html_el);
|
|
822
|
-
// res.push(...(html_el.querySelectorAll(selector)));
|
|
823
|
-
// // no need to go further.
|
|
824
|
-
// matcher.rewind();
|
|
825
|
-
// stack.pop();
|
|
826
|
-
// continue;
|
|
827
|
-
// }
|
|
828
|
-
// }
|
|
829
|
-
// }
|
|
830
|
-
// if (state[1] < el.childNodes.length) {
|
|
831
|
-
// stack.push([el.childNodes[state[1]++], 0, false]);
|
|
832
|
-
// } else {
|
|
833
|
-
// if (state[2]) {
|
|
834
|
-
// matcher.rewind();
|
|
835
|
-
// }
|
|
836
|
-
// stack.pop();
|
|
837
|
-
// }
|
|
838
|
-
// }
|
|
839
|
-
// return res;
|
|
840
|
-
// }, [] as HTMLElement[]);
|
|
841
740
|
};
|
|
842
741
|
/**
|
|
843
742
|
* Query CSS Selector to find matching node.
|
|
844
743
|
* @param {string} selector Simplified CSS selector
|
|
845
|
-
* @return {HTMLElement} matching node
|
|
744
|
+
* @return {(HTMLElement|null)} matching node
|
|
846
745
|
*/
|
|
847
746
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
848
747
|
return (0, css_select_1.selectOne)(selector, this, {
|
|
849
748
|
xmlMode: true,
|
|
850
|
-
adapter: matcher_1.default
|
|
749
|
+
adapter: matcher_1.default,
|
|
851
750
|
});
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
//
|
|
864
|
-
//
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
751
|
+
};
|
|
752
|
+
/**
|
|
753
|
+
* find elements by their tagName
|
|
754
|
+
* @param {string} tagName the tagName of the elements to select
|
|
755
|
+
*/
|
|
756
|
+
HTMLElement.prototype.getElementsByTagName = function (tagName) {
|
|
757
|
+
var upperCasedTagName = tagName.toUpperCase();
|
|
758
|
+
var re = [];
|
|
759
|
+
var stack = [];
|
|
760
|
+
var currentNodeReference = this;
|
|
761
|
+
var index = 0;
|
|
762
|
+
// index turns to undefined once the stack is empty and the first condition occurs
|
|
763
|
+
// which happens once all relevant children are searched through
|
|
764
|
+
while (index !== undefined) {
|
|
765
|
+
var child = void 0;
|
|
766
|
+
// make it work with sparse arrays
|
|
767
|
+
do {
|
|
768
|
+
child = currentNodeReference.childNodes[index++];
|
|
769
|
+
} while (index < currentNodeReference.childNodes.length && child === undefined);
|
|
770
|
+
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
|
|
771
|
+
if (child === undefined) {
|
|
772
|
+
currentNodeReference = currentNodeReference.parentNode;
|
|
773
|
+
index = stack.pop();
|
|
774
|
+
continue;
|
|
775
|
+
}
|
|
776
|
+
if (child.nodeType === type_3.default.ELEMENT_NODE) {
|
|
777
|
+
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
|
|
778
|
+
if (tagName === '*' || child.tagName === upperCasedTagName)
|
|
779
|
+
re.push(child);
|
|
780
|
+
// if children are existing push the current status to the stack and keep searching for elements in the level below
|
|
781
|
+
if (child.childNodes.length > 0) {
|
|
782
|
+
stack.push(index);
|
|
783
|
+
currentNodeReference = child;
|
|
784
|
+
index = 0;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
return re;
|
|
889
789
|
};
|
|
890
790
|
/**
|
|
891
791
|
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
|
|
@@ -927,7 +827,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
927
827
|
return [node];
|
|
928
828
|
}, findOne: findOne, findAll: function () {
|
|
929
829
|
return [];
|
|
930
|
-
} })
|
|
830
|
+
} }),
|
|
931
831
|
});
|
|
932
832
|
if (e) {
|
|
933
833
|
return e;
|
|
@@ -1005,7 +905,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1005
905
|
});
|
|
1006
906
|
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
|
|
1007
907
|
/**
|
|
1008
|
-
* Get escaped (as-
|
|
908
|
+
* Get escaped (as-is) attributes
|
|
1009
909
|
* @return {Object} parsed attributes
|
|
1010
910
|
*/
|
|
1011
911
|
get: function () {
|
|
@@ -1014,10 +914,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1014
914
|
}
|
|
1015
915
|
var attrs = {};
|
|
1016
916
|
if (this.rawAttrs) {
|
|
1017
|
-
var re =
|
|
917
|
+
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
1018
918
|
var match = void 0;
|
|
1019
919
|
while ((match = re.exec(this.rawAttrs))) {
|
|
1020
|
-
|
|
920
|
+
var key = match[1];
|
|
921
|
+
var val = match[2] || null;
|
|
922
|
+
if (val && (val[0] === "'" || val[0] === "\""))
|
|
923
|
+
val = val.slice(1, val.length - 1);
|
|
924
|
+
attrs[key] = val;
|
|
1021
925
|
}
|
|
1022
926
|
}
|
|
1023
927
|
this._rawAttrs = attrs;
|
|
@@ -1034,13 +938,15 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1034
938
|
delete this._attrs[key];
|
|
1035
939
|
}
|
|
1036
940
|
// Update rawString
|
|
1037
|
-
this.rawAttrs = Object.keys(attrs)
|
|
941
|
+
this.rawAttrs = Object.keys(attrs)
|
|
942
|
+
.map(function (name) {
|
|
1038
943
|
var val = JSON.stringify(attrs[name]);
|
|
1039
944
|
if (val === undefined || val === 'null') {
|
|
1040
945
|
return name;
|
|
1041
946
|
}
|
|
1042
947
|
return name + "=" + val;
|
|
1043
|
-
})
|
|
948
|
+
})
|
|
949
|
+
.join(' ');
|
|
1044
950
|
// Update this.id
|
|
1045
951
|
if (key === 'id') {
|
|
1046
952
|
this.id = '';
|
|
@@ -1064,7 +970,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1064
970
|
HTMLElement.prototype.setAttribute = function (key, value) {
|
|
1065
971
|
var _this = this;
|
|
1066
972
|
if (arguments.length < 2) {
|
|
1067
|
-
throw new Error(
|
|
973
|
+
throw new Error("Failed to execute 'setAttribute' on 'Element'");
|
|
1068
974
|
}
|
|
1069
975
|
var k2 = key.toLowerCase();
|
|
1070
976
|
var attrs = this.rawAttributes;
|
|
@@ -1080,13 +986,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1080
986
|
this._attrs[k2] = decode(attrs[key]);
|
|
1081
987
|
}
|
|
1082
988
|
// Update rawString
|
|
1083
|
-
this.rawAttrs = Object.keys(attrs)
|
|
989
|
+
this.rawAttrs = Object.keys(attrs)
|
|
990
|
+
.map(function (name) {
|
|
1084
991
|
var val = _this.quoteAttribute(attrs[name]);
|
|
1085
|
-
if (val === 'null' || val === '""')
|
|
992
|
+
if (val === 'null' || val === '""')
|
|
1086
993
|
return name;
|
|
1087
|
-
}
|
|
1088
994
|
return name + "=" + val;
|
|
1089
|
-
})
|
|
995
|
+
})
|
|
996
|
+
.join(' ');
|
|
1090
997
|
// Update this.id
|
|
1091
998
|
if (key === 'id') {
|
|
1092
999
|
this.id = value;
|
|
@@ -1107,13 +1014,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1107
1014
|
delete this._rawAttrs;
|
|
1108
1015
|
}
|
|
1109
1016
|
// Update rawString
|
|
1110
|
-
this.rawAttrs = Object.keys(attributes)
|
|
1017
|
+
this.rawAttrs = Object.keys(attributes)
|
|
1018
|
+
.map(function (name) {
|
|
1111
1019
|
var val = attributes[name];
|
|
1112
|
-
if (val === 'null' || val === '""')
|
|
1020
|
+
if (val === 'null' || val === '""')
|
|
1113
1021
|
return name;
|
|
1114
|
-
}
|
|
1115
1022
|
return name + "=" + _this.quoteAttribute(String(val));
|
|
1116
|
-
})
|
|
1023
|
+
})
|
|
1024
|
+
.join(' ');
|
|
1117
1025
|
};
|
|
1118
1026
|
HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
|
|
1119
1027
|
var _a, _b, _c;
|
|
@@ -1166,9 +1074,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1166
1074
|
var i = 0;
|
|
1167
1075
|
while (i < children.length) {
|
|
1168
1076
|
var child = children[i++];
|
|
1169
|
-
if (this === child)
|
|
1077
|
+
if (this === child)
|
|
1170
1078
|
return children[i] || null;
|
|
1171
|
-
}
|
|
1172
1079
|
}
|
|
1173
1080
|
return null;
|
|
1174
1081
|
}
|
|
@@ -1210,12 +1117,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1210
1117
|
}(node_2.default));
|
|
1211
1118
|
exports.default = HTMLElement;
|
|
1212
1119
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
|
1213
|
-
var kMarkupPattern = /<!--[
|
|
1214
|
-
|
|
1215
|
-
// <([a-z][-.:0-9_a-z]*)\s*\/>
|
|
1216
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
|
|
1217
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
|
|
1218
|
-
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
|
|
1120
|
+
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
|
|
1121
|
+
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
|
|
1219
1122
|
var kSelfClosingElements = {
|
|
1220
1123
|
area: true,
|
|
1221
1124
|
AREA: true,
|
|
@@ -1244,7 +1147,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1244
1147
|
track: true,
|
|
1245
1148
|
TRACK: true,
|
|
1246
1149
|
wbr: true,
|
|
1247
|
-
WBR: true
|
|
1150
|
+
WBR: true,
|
|
1248
1151
|
};
|
|
1249
1152
|
var kElementsClosedByOpening = {
|
|
1250
1153
|
li: { li: true, LI: true },
|
|
@@ -1268,7 +1171,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1268
1171
|
h5: { h5: true, H5: true },
|
|
1269
1172
|
H5: { h5: true, H5: true },
|
|
1270
1173
|
h6: { h6: true, H6: true },
|
|
1271
|
-
H6: { h6: true, H6: true }
|
|
1174
|
+
H6: { h6: true, H6: true },
|
|
1272
1175
|
};
|
|
1273
1176
|
var kElementsClosedByClosing = {
|
|
1274
1177
|
li: { ul: true, ol: true, UL: true, OL: true },
|
|
@@ -1284,7 +1187,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1284
1187
|
td: { tr: true, table: true, TR: true, TABLE: true },
|
|
1285
1188
|
TD: { tr: true, table: true, TR: true, TABLE: true },
|
|
1286
1189
|
th: { tr: true, table: true, TR: true, TABLE: true },
|
|
1287
|
-
TH: { tr: true, table: true, TR: true, TABLE: true }
|
|
1190
|
+
TH: { tr: true, table: true, TR: true, TABLE: true },
|
|
1288
1191
|
};
|
|
1289
1192
|
var frameflag = 'documentfragmentcontainer';
|
|
1290
1193
|
/**
|
|
@@ -1299,45 +1202,39 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1299
1202
|
script: true,
|
|
1300
1203
|
noscript: true,
|
|
1301
1204
|
style: true,
|
|
1302
|
-
pre: true
|
|
1205
|
+
pre: true,
|
|
1303
1206
|
};
|
|
1304
1207
|
var element_names = Object.keys(elements);
|
|
1305
|
-
var kBlockTextElements = element_names.map(function (it) {
|
|
1306
|
-
|
|
1307
|
-
});
|
|
1308
|
-
var kIgnoreElements = element_names.filter(function (it) {
|
|
1309
|
-
return elements[it];
|
|
1310
|
-
}).map(function (it) {
|
|
1311
|
-
return new RegExp(it, 'i');
|
|
1312
|
-
});
|
|
1208
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1209
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1313
1210
|
function element_should_be_ignore(tag) {
|
|
1314
|
-
return kIgnoreElements.some(function (it) {
|
|
1315
|
-
return it.test(tag);
|
|
1316
|
-
});
|
|
1211
|
+
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1317
1212
|
}
|
|
1318
1213
|
function is_block_text_element(tag) {
|
|
1319
|
-
return kBlockTextElements.some(function (it) {
|
|
1320
|
-
return it.test(tag);
|
|
1321
|
-
});
|
|
1214
|
+
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1322
1215
|
}
|
|
1323
|
-
var createRange = function (startPos, endPos) {
|
|
1324
|
-
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1325
|
-
};
|
|
1216
|
+
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1326
1217
|
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1327
1218
|
var currentParent = root;
|
|
1328
1219
|
var stack = [root];
|
|
1329
1220
|
var lastTextPos = -1;
|
|
1221
|
+
var noNestedTagIndex = undefined;
|
|
1330
1222
|
var match;
|
|
1331
1223
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1332
1224
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
1225
|
+
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1333
1226
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1334
1227
|
var frameFlagOffset = frameflag.length + 2;
|
|
1335
1228
|
while ((match = kMarkupPattern.exec(data))) {
|
|
1336
|
-
|
|
1229
|
+
// Note: Object destructuring here consistently tests as higher performance than array destructuring
|
|
1230
|
+
// eslint-disable-next-line prefer-const
|
|
1231
|
+
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
|
|
1232
|
+
var matchLength = matchText.length;
|
|
1233
|
+
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
|
|
1337
1234
|
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1338
1235
|
// Add TextNode if content
|
|
1339
1236
|
if (lastTextPos > -1) {
|
|
1340
|
-
if (lastTextPos +
|
|
1237
|
+
if (lastTextPos + matchLength < tagEndPos) {
|
|
1341
1238
|
var text = data.substring(lastTextPos, tagStartPos);
|
|
1342
1239
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1343
1240
|
}
|
|
@@ -1345,10 +1242,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1345
1242
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1346
1243
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1347
1244
|
// Skip frameflag node
|
|
1348
|
-
if (
|
|
1245
|
+
if (tagName === frameflag)
|
|
1349
1246
|
continue;
|
|
1350
1247
|
// Handle comments
|
|
1351
|
-
if (
|
|
1248
|
+
if (matchText[1] === '!') {
|
|
1352
1249
|
if (options.comment) {
|
|
1353
1250
|
// Only keep what is in between <!-- and -->
|
|
1354
1251
|
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
@@ -1358,36 +1255,46 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1358
1255
|
}
|
|
1359
1256
|
/* -- Handle tag matching -- */
|
|
1360
1257
|
// Fix tag casing if necessary
|
|
1361
|
-
if (
|
|
1362
|
-
|
|
1258
|
+
if (lowerCaseTagName)
|
|
1259
|
+
tagName = tagName.toLowerCase();
|
|
1363
1260
|
// Handle opening tags (ie. <this> not </that>)
|
|
1364
|
-
if (!
|
|
1261
|
+
if (!leadingSlash) {
|
|
1365
1262
|
/* Populate attributes */
|
|
1366
1263
|
var attrs = {};
|
|
1367
|
-
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(
|
|
1368
|
-
|
|
1264
|
+
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
|
|
1265
|
+
var key = attMatch[1], val = attMatch[2];
|
|
1266
|
+
var isQuoted = val[0] === "'" || val[0] === "\"";
|
|
1267
|
+
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
|
|
1369
1268
|
}
|
|
1370
|
-
var
|
|
1371
|
-
if (!
|
|
1372
|
-
if (kElementsClosedByOpening[
|
|
1269
|
+
var parentTagName = currentParent.rawTagName;
|
|
1270
|
+
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
|
|
1271
|
+
if (kElementsClosedByOpening[parentTagName][tagName]) {
|
|
1373
1272
|
stack.pop();
|
|
1374
1273
|
currentParent = (0, back_1.default)(stack);
|
|
1375
1274
|
}
|
|
1376
1275
|
}
|
|
1276
|
+
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1277
|
+
if (tagName === 'a' || tagName === 'A') {
|
|
1278
|
+
if (noNestedTagIndex !== undefined) {
|
|
1279
|
+
stack.splice(noNestedTagIndex);
|
|
1280
|
+
currentParent = (0, back_1.default)(stack);
|
|
1281
|
+
}
|
|
1282
|
+
noNestedTagIndex = stack.length;
|
|
1283
|
+
}
|
|
1377
1284
|
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1378
|
-
var tagStartPos_1 = tagEndPos_1 -
|
|
1285
|
+
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1379
1286
|
currentParent = currentParent.appendChild(
|
|
1380
1287
|
// Initialize range (end position updated later for closed tags)
|
|
1381
|
-
new HTMLElement(
|
|
1288
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1382
1289
|
stack.push(currentParent);
|
|
1383
|
-
if (is_block_text_element(
|
|
1290
|
+
if (is_block_text_element(tagName)) {
|
|
1384
1291
|
// Find closing tag
|
|
1385
|
-
var closeMarkup = "</" +
|
|
1386
|
-
var closeIndex =
|
|
1292
|
+
var closeMarkup = "</" + tagName + ">";
|
|
1293
|
+
var closeIndex = lowerCaseTagName
|
|
1387
1294
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1388
1295
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1389
1296
|
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1390
|
-
if (element_should_be_ignore(
|
|
1297
|
+
if (element_should_be_ignore(tagName)) {
|
|
1391
1298
|
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1392
1299
|
if (text.length > 0 && /\S/.test(text)) {
|
|
1393
1300
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
@@ -1399,14 +1306,16 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1399
1306
|
else {
|
|
1400
1307
|
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1401
1308
|
// Cause to be treated as self-closing, because no close found
|
|
1402
|
-
|
|
1309
|
+
leadingSlash = '/';
|
|
1403
1310
|
}
|
|
1404
1311
|
}
|
|
1405
1312
|
}
|
|
1406
1313
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1407
|
-
if (
|
|
1314
|
+
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1408
1315
|
while (true) {
|
|
1409
|
-
if (
|
|
1316
|
+
if (tagName === 'a' || tagName === 'A')
|
|
1317
|
+
noNestedTagIndex = undefined;
|
|
1318
|
+
if (currentParent.rawTagName === tagName) {
|
|
1410
1319
|
// Update range end for closed tag
|
|
1411
1320
|
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1412
1321
|
stack.pop();
|
|
@@ -1414,10 +1323,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1414
1323
|
break;
|
|
1415
1324
|
}
|
|
1416
1325
|
else {
|
|
1417
|
-
var
|
|
1326
|
+
var parentTagName = currentParent.tagName;
|
|
1418
1327
|
// Trying to close current tag, and move on
|
|
1419
|
-
if (kElementsClosedByClosing[
|
|
1420
|
-
if (kElementsClosedByClosing[
|
|
1328
|
+
if (kElementsClosedByClosing[parentTagName]) {
|
|
1329
|
+
if (kElementsClosedByClosing[parentTagName][tagName]) {
|
|
1421
1330
|
stack.pop();
|
|
1422
1331
|
currentParent = (0, back_1.default)(stack);
|
|
1423
1332
|
continue;
|