node-html-parser 4.1.4 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +34 -25
- package/dist/main.js +177 -271
- package/dist/nodes/html.d.ts +12 -5
- package/dist/nodes/html.js +177 -271
- package/esm/index.js +11 -0
- package/esm/package.json +3 -0
- package/package.json +46 -17
- package/.eslintignore +0 -3
- package/.eslintrc.json +0 -226
- package/.mocharc.yaml +0 -1
- package/dist/esm/back.js +0 -3
- package/dist/esm/index.js +0 -7
- package/dist/esm/matcher.js +0 -101
- package/dist/esm/nodes/comment.js +0 -23
- package/dist/esm/nodes/html.js +0 -1102
- package/dist/esm/nodes/node.js +0 -25
- package/dist/esm/nodes/text.js +0 -95
- package/dist/esm/nodes/type.js +0 -7
- package/dist/esm/parse.js +0 -1
- package/dist/esm/valid.js +0 -9
package/dist/main.js
CHANGED
|
@@ -34,7 +34,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
|
34
34
|
ar[i] = from[i];
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
|
-
return to.concat(ar || from);
|
|
37
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
38
38
|
};
|
|
39
39
|
define("back", ["require", "exports"], function (require, exports) {
|
|
40
40
|
"use strict";
|
|
@@ -293,89 +293,40 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
293
293
|
matcher_1 = __importDefault(matcher_1);
|
|
294
294
|
back_1 = __importDefault(back_1);
|
|
295
295
|
comment_1 = __importDefault(comment_1);
|
|
296
|
-
|
|
296
|
+
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
297
297
|
function decode(val) {
|
|
298
298
|
// clone string
|
|
299
299
|
return JSON.parse(JSON.stringify(he_2.default.decode(val)));
|
|
300
300
|
}
|
|
301
301
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
302
|
+
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
303
|
+
var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
|
|
304
|
+
var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
|
|
305
|
+
var tableTags = ['table', 'td', 'tr'];
|
|
306
|
+
var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
|
|
302
307
|
var kBlockElements = new Set();
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
kBlockElements.add('dl');
|
|
322
|
-
kBlockElements.add('DL');
|
|
323
|
-
kBlockElements.add('dt');
|
|
324
|
-
kBlockElements.add('DT');
|
|
325
|
-
kBlockElements.add('fieldset');
|
|
326
|
-
kBlockElements.add('FIELDSET');
|
|
327
|
-
kBlockElements.add('figcaption');
|
|
328
|
-
kBlockElements.add('FIGCAPTION');
|
|
329
|
-
kBlockElements.add('figure');
|
|
330
|
-
kBlockElements.add('FIGURE');
|
|
331
|
-
kBlockElements.add('footer');
|
|
332
|
-
kBlockElements.add('FOOTER');
|
|
333
|
-
kBlockElements.add('form');
|
|
334
|
-
kBlockElements.add('FORM');
|
|
335
|
-
kBlockElements.add('h1');
|
|
336
|
-
kBlockElements.add('H1');
|
|
337
|
-
kBlockElements.add('h2');
|
|
338
|
-
kBlockElements.add('H2');
|
|
339
|
-
kBlockElements.add('h3');
|
|
340
|
-
kBlockElements.add('H3');
|
|
341
|
-
kBlockElements.add('h4');
|
|
342
|
-
kBlockElements.add('H4');
|
|
343
|
-
kBlockElements.add('h5');
|
|
344
|
-
kBlockElements.add('H5');
|
|
345
|
-
kBlockElements.add('h6');
|
|
346
|
-
kBlockElements.add('H6');
|
|
347
|
-
kBlockElements.add('header');
|
|
348
|
-
kBlockElements.add('HEADER');
|
|
349
|
-
kBlockElements.add('hgroup');
|
|
350
|
-
kBlockElements.add('HGROUP');
|
|
351
|
-
kBlockElements.add('hr');
|
|
352
|
-
kBlockElements.add('HR');
|
|
353
|
-
kBlockElements.add('li');
|
|
354
|
-
kBlockElements.add('LI');
|
|
355
|
-
kBlockElements.add('main');
|
|
356
|
-
kBlockElements.add('MAIN');
|
|
357
|
-
kBlockElements.add('nav');
|
|
358
|
-
kBlockElements.add('NAV');
|
|
359
|
-
kBlockElements.add('ol');
|
|
360
|
-
kBlockElements.add('OL');
|
|
361
|
-
kBlockElements.add('p');
|
|
362
|
-
kBlockElements.add('P');
|
|
363
|
-
kBlockElements.add('pre');
|
|
364
|
-
kBlockElements.add('PRE');
|
|
365
|
-
kBlockElements.add('section');
|
|
366
|
-
kBlockElements.add('SECTION');
|
|
367
|
-
kBlockElements.add('table');
|
|
368
|
-
kBlockElements.add('TABLE');
|
|
369
|
-
kBlockElements.add('td');
|
|
370
|
-
kBlockElements.add('TD');
|
|
371
|
-
kBlockElements.add('tr');
|
|
372
|
-
kBlockElements.add('TR');
|
|
373
|
-
kBlockElements.add('ul');
|
|
374
|
-
kBlockElements.add('UL');
|
|
308
|
+
function addToKBlockElement() {
|
|
309
|
+
var args = [];
|
|
310
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
311
|
+
args[_i] = arguments[_i];
|
|
312
|
+
}
|
|
313
|
+
var addToSet = function (array) {
|
|
314
|
+
for (var index = 0; index < array.length; index++) {
|
|
315
|
+
var element = array[index];
|
|
316
|
+
kBlockElements.add(element);
|
|
317
|
+
kBlockElements.add(element.toUpperCase());
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
|
|
321
|
+
var arg = args_1[_a];
|
|
322
|
+
addToSet(arg);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
|
|
375
326
|
var DOMTokenList = /** @class */ (function () {
|
|
376
327
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
377
328
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
378
|
-
if (afterUpdate === void 0) { afterUpdate =
|
|
329
|
+
if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
|
|
379
330
|
this._set = new Set(valuesInit);
|
|
380
331
|
this._afterUpdate = afterUpdate;
|
|
381
332
|
}
|
|
@@ -396,8 +347,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
396
347
|
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
397
348
|
};
|
|
398
349
|
DOMTokenList.prototype.remove = function (c) {
|
|
399
|
-
this._set.delete(c) &&
|
|
400
|
-
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
350
|
+
this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
401
351
|
};
|
|
402
352
|
DOMTokenList.prototype.toggle = function (c) {
|
|
403
353
|
this._validate(c);
|
|
@@ -462,8 +412,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
462
412
|
_this.rawAttrs = rawAttrs || '';
|
|
463
413
|
_this.id = keyAttrs.id || '';
|
|
464
414
|
_this.childNodes = [];
|
|
465
|
-
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return
|
|
466
|
-
);
|
|
415
|
+
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
416
|
+
);
|
|
467
417
|
if (keyAttrs.id) {
|
|
468
418
|
if (!rawAttrs) {
|
|
469
419
|
_this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
|
|
@@ -488,8 +438,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
488
438
|
* @returns {string} quoted value
|
|
489
439
|
*/
|
|
490
440
|
HTMLElement.prototype.quoteAttribute = function (attr) {
|
|
491
|
-
if (attr
|
|
492
|
-
return
|
|
441
|
+
if (attr == null) {
|
|
442
|
+
return 'null';
|
|
493
443
|
}
|
|
494
444
|
return JSON.stringify(attr.replace(/"/g, '"'));
|
|
495
445
|
};
|
|
@@ -511,7 +461,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
511
461
|
*/
|
|
512
462
|
HTMLElement.prototype.removeChild = function (node) {
|
|
513
463
|
this.childNodes = this.childNodes.filter(function (child) {
|
|
514
|
-
return
|
|
464
|
+
return child !== node;
|
|
515
465
|
});
|
|
516
466
|
};
|
|
517
467
|
/**
|
|
@@ -532,6 +482,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
532
482
|
get: function () {
|
|
533
483
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
534
484
|
},
|
|
485
|
+
set: function (newname) {
|
|
486
|
+
this.rawTagName = newname.toLowerCase();
|
|
487
|
+
},
|
|
535
488
|
enumerable: false,
|
|
536
489
|
configurable: true
|
|
537
490
|
});
|
|
@@ -542,6 +495,13 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
542
495
|
enumerable: false,
|
|
543
496
|
configurable: true
|
|
544
497
|
});
|
|
498
|
+
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
|
|
499
|
+
get: function () {
|
|
500
|
+
return voidTags.has(this.localName);
|
|
501
|
+
},
|
|
502
|
+
enumerable: false,
|
|
503
|
+
configurable: true
|
|
504
|
+
});
|
|
545
505
|
Object.defineProperty(HTMLElement.prototype, "rawText", {
|
|
546
506
|
/**
|
|
547
507
|
* Get escpaed (as-it) text value of current node and its children.
|
|
@@ -589,11 +549,11 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
589
549
|
if (node.nodeType === type_3.default.ELEMENT_NODE) {
|
|
590
550
|
if (kBlockElements.has(node.rawTagName)) {
|
|
591
551
|
if (currentBlock.length > 0) {
|
|
592
|
-
blocks.push(currentBlock = []);
|
|
552
|
+
blocks.push((currentBlock = []));
|
|
593
553
|
}
|
|
594
554
|
node.childNodes.forEach(dfs);
|
|
595
555
|
if (currentBlock.length > 0) {
|
|
596
|
-
blocks.push(currentBlock = []);
|
|
556
|
+
blocks.push((currentBlock = []));
|
|
597
557
|
}
|
|
598
558
|
}
|
|
599
559
|
else {
|
|
@@ -616,11 +576,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
616
576
|
}
|
|
617
577
|
}
|
|
618
578
|
dfs(this);
|
|
619
|
-
return blocks
|
|
620
|
-
|
|
621
|
-
return block.join('').replace(/\s{2,}/g, ' ');
|
|
579
|
+
return blocks
|
|
580
|
+
.map(function (block) {
|
|
581
|
+
return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
|
|
622
582
|
})
|
|
623
|
-
.join('\n')
|
|
583
|
+
.join('\n')
|
|
584
|
+
.replace(/\s+$/, ''); // trimRight;
|
|
624
585
|
},
|
|
625
586
|
enumerable: false,
|
|
626
587
|
configurable: true
|
|
@@ -628,22 +589,18 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
628
589
|
HTMLElement.prototype.toString = function () {
|
|
629
590
|
var tag = this.rawTagName;
|
|
630
591
|
if (tag) {
|
|
631
|
-
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
|
|
632
|
-
// const is_void = void_tags.has(tag);
|
|
633
|
-
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
634
592
|
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
|
|
635
|
-
|
|
636
|
-
return "<" + tag + attrs + ">";
|
|
637
|
-
}
|
|
638
|
-
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
593
|
+
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
639
594
|
}
|
|
640
595
|
return this.innerHTML;
|
|
641
596
|
};
|
|
642
597
|
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
|
|
643
598
|
get: function () {
|
|
644
|
-
return this.childNodes
|
|
599
|
+
return this.childNodes
|
|
600
|
+
.map(function (child) {
|
|
645
601
|
return child.toString();
|
|
646
|
-
})
|
|
602
|
+
})
|
|
603
|
+
.join('');
|
|
647
604
|
},
|
|
648
605
|
set: function (content) {
|
|
649
606
|
//const r = parse(content, global.options); // TODO global.options ?
|
|
@@ -670,7 +627,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
670
627
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
671
628
|
nodes[_i] = arguments[_i];
|
|
672
629
|
}
|
|
673
|
-
var content = nodes
|
|
630
|
+
var content = nodes
|
|
631
|
+
.map(function (node) {
|
|
674
632
|
if (node instanceof node_2.default) {
|
|
675
633
|
return [node];
|
|
676
634
|
}
|
|
@@ -680,7 +638,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
680
638
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
681
639
|
}
|
|
682
640
|
return [];
|
|
683
|
-
})
|
|
641
|
+
})
|
|
642
|
+
.flat();
|
|
684
643
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
685
644
|
return child === _this;
|
|
686
645
|
});
|
|
@@ -727,8 +686,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
727
686
|
res.push(' '.repeat(indention) + str);
|
|
728
687
|
}
|
|
729
688
|
function dfs(node) {
|
|
730
|
-
var idStr = node.id ?
|
|
731
|
-
var classStr = node.classList.length ?
|
|
689
|
+
var idStr = node.id ? "#" + node.id : '';
|
|
690
|
+
var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
732
691
|
write("" + node.rawTagName + idStr + classStr);
|
|
733
692
|
indention++;
|
|
734
693
|
node.childNodes.forEach(function (childNode) {
|
|
@@ -779,113 +738,57 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
779
738
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
780
739
|
return (0, css_select_1.selectAll)(selector, this, {
|
|
781
740
|
xmlMode: true,
|
|
782
|
-
adapter: matcher_1.default
|
|
741
|
+
adapter: matcher_1.default,
|
|
783
742
|
});
|
|
784
|
-
// let matcher: Matcher;
|
|
785
|
-
// if (selector instanceof Matcher) {
|
|
786
|
-
// matcher = selector;
|
|
787
|
-
// matcher.reset();
|
|
788
|
-
// } else {
|
|
789
|
-
// if (selector.includes(',')) {
|
|
790
|
-
// const selectors = selector.split(',');
|
|
791
|
-
// return Array.from(selectors.reduce((pre, cur) => {
|
|
792
|
-
// const result = this.querySelectorAll(cur.trim());
|
|
793
|
-
// return result.reduce((p, c) => {
|
|
794
|
-
// return p.add(c);
|
|
795
|
-
// }, pre);
|
|
796
|
-
// }, new Set<HTMLElement>()));
|
|
797
|
-
// }
|
|
798
|
-
// matcher = new Matcher(selector);
|
|
799
|
-
// }
|
|
800
|
-
// interface IStack {
|
|
801
|
-
// 0: Node; // node
|
|
802
|
-
// 1: number; // children
|
|
803
|
-
// 2: boolean; // found flag
|
|
804
|
-
// }
|
|
805
|
-
// const stack = [] as IStack[];
|
|
806
|
-
// return this.childNodes.reduce((res, cur) => {
|
|
807
|
-
// stack.push([cur, 0, false]);
|
|
808
|
-
// while (stack.length) {
|
|
809
|
-
// const state = arr_back(stack); // get last element
|
|
810
|
-
// const el = state[0];
|
|
811
|
-
// if (state[1] === 0) {
|
|
812
|
-
// // Seen for first time.
|
|
813
|
-
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
814
|
-
// stack.pop();
|
|
815
|
-
// continue;
|
|
816
|
-
// }
|
|
817
|
-
// const html_el = el as HTMLElement;
|
|
818
|
-
// state[2] = matcher.advance(html_el);
|
|
819
|
-
// if (state[2]) {
|
|
820
|
-
// if (matcher.matched) {
|
|
821
|
-
// res.push(html_el);
|
|
822
|
-
// res.push(...(html_el.querySelectorAll(selector)));
|
|
823
|
-
// // no need to go further.
|
|
824
|
-
// matcher.rewind();
|
|
825
|
-
// stack.pop();
|
|
826
|
-
// continue;
|
|
827
|
-
// }
|
|
828
|
-
// }
|
|
829
|
-
// }
|
|
830
|
-
// if (state[1] < el.childNodes.length) {
|
|
831
|
-
// stack.push([el.childNodes[state[1]++], 0, false]);
|
|
832
|
-
// } else {
|
|
833
|
-
// if (state[2]) {
|
|
834
|
-
// matcher.rewind();
|
|
835
|
-
// }
|
|
836
|
-
// stack.pop();
|
|
837
|
-
// }
|
|
838
|
-
// }
|
|
839
|
-
// return res;
|
|
840
|
-
// }, [] as HTMLElement[]);
|
|
841
743
|
};
|
|
842
744
|
/**
|
|
843
745
|
* Query CSS Selector to find matching node.
|
|
844
746
|
* @param {string} selector Simplified CSS selector
|
|
845
|
-
* @return {HTMLElement} matching node
|
|
747
|
+
* @return {(HTMLElement|null)} matching node
|
|
846
748
|
*/
|
|
847
749
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
848
750
|
return (0, css_select_1.selectOne)(selector, this, {
|
|
849
751
|
xmlMode: true,
|
|
850
|
-
adapter: matcher_1.default
|
|
752
|
+
adapter: matcher_1.default,
|
|
851
753
|
});
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
//
|
|
864
|
-
//
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
754
|
+
};
|
|
755
|
+
/**
|
|
756
|
+
* find elements by their tagName
|
|
757
|
+
* @param {string} tagName the tagName of the elements to select
|
|
758
|
+
*/
|
|
759
|
+
HTMLElement.prototype.getElementsByTagName = function (tagName) {
|
|
760
|
+
var upperCasedTagName = tagName.toUpperCase();
|
|
761
|
+
var re = [];
|
|
762
|
+
var stack = [];
|
|
763
|
+
var currentNodeReference = this;
|
|
764
|
+
var index = 0;
|
|
765
|
+
// index turns to undefined once the stack is empty and the first condition occurs
|
|
766
|
+
// which happens once all relevant children are searched through
|
|
767
|
+
while (index !== undefined) {
|
|
768
|
+
var child = void 0;
|
|
769
|
+
// make it work with sparse arrays
|
|
770
|
+
do {
|
|
771
|
+
child = currentNodeReference.childNodes[index++];
|
|
772
|
+
} while (index < currentNodeReference.childNodes.length && child === undefined);
|
|
773
|
+
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
|
|
774
|
+
if (child === undefined) {
|
|
775
|
+
currentNodeReference = currentNodeReference.parentNode;
|
|
776
|
+
index = stack.pop();
|
|
777
|
+
continue;
|
|
778
|
+
}
|
|
779
|
+
if (child.nodeType === type_3.default.ELEMENT_NODE) {
|
|
780
|
+
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
|
|
781
|
+
if (tagName === '*' || child.tagName === upperCasedTagName)
|
|
782
|
+
re.push(child);
|
|
783
|
+
// if children are existing push the current status to the stack and keep searching for elements in the level below
|
|
784
|
+
if (child.childNodes.length > 0) {
|
|
785
|
+
stack.push(index);
|
|
786
|
+
currentNodeReference = child;
|
|
787
|
+
index = 0;
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
return re;
|
|
889
792
|
};
|
|
890
793
|
/**
|
|
891
794
|
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
|
|
@@ -927,7 +830,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
927
830
|
return [node];
|
|
928
831
|
}, findOne: findOne, findAll: function () {
|
|
929
832
|
return [];
|
|
930
|
-
} })
|
|
833
|
+
} }),
|
|
931
834
|
});
|
|
932
835
|
if (e) {
|
|
933
836
|
return e;
|
|
@@ -1005,7 +908,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1005
908
|
});
|
|
1006
909
|
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
|
|
1007
910
|
/**
|
|
1008
|
-
* Get escaped (as-
|
|
911
|
+
* Get escaped (as-is) attributes
|
|
1009
912
|
* @return {Object} parsed attributes
|
|
1010
913
|
*/
|
|
1011
914
|
get: function () {
|
|
@@ -1014,10 +917,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1014
917
|
}
|
|
1015
918
|
var attrs = {};
|
|
1016
919
|
if (this.rawAttrs) {
|
|
1017
|
-
var re = /([a-
|
|
920
|
+
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
1018
921
|
var match = void 0;
|
|
1019
922
|
while ((match = re.exec(this.rawAttrs))) {
|
|
1020
|
-
|
|
923
|
+
var key = match[1];
|
|
924
|
+
var val = match[2] || null;
|
|
925
|
+
if (val && (val[0] === "'" || val[0] === "\""))
|
|
926
|
+
val = val.slice(1, val.length - 1);
|
|
927
|
+
attrs[key] = val;
|
|
1021
928
|
}
|
|
1022
929
|
}
|
|
1023
930
|
this._rawAttrs = attrs;
|
|
@@ -1034,13 +941,15 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1034
941
|
delete this._attrs[key];
|
|
1035
942
|
}
|
|
1036
943
|
// Update rawString
|
|
1037
|
-
this.rawAttrs = Object.keys(attrs)
|
|
944
|
+
this.rawAttrs = Object.keys(attrs)
|
|
945
|
+
.map(function (name) {
|
|
1038
946
|
var val = JSON.stringify(attrs[name]);
|
|
1039
947
|
if (val === undefined || val === 'null') {
|
|
1040
948
|
return name;
|
|
1041
949
|
}
|
|
1042
950
|
return name + "=" + val;
|
|
1043
|
-
})
|
|
951
|
+
})
|
|
952
|
+
.join(' ');
|
|
1044
953
|
// Update this.id
|
|
1045
954
|
if (key === 'id') {
|
|
1046
955
|
this.id = '';
|
|
@@ -1064,7 +973,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1064
973
|
HTMLElement.prototype.setAttribute = function (key, value) {
|
|
1065
974
|
var _this = this;
|
|
1066
975
|
if (arguments.length < 2) {
|
|
1067
|
-
throw new Error(
|
|
976
|
+
throw new Error("Failed to execute 'setAttribute' on 'Element'");
|
|
1068
977
|
}
|
|
1069
978
|
var k2 = key.toLowerCase();
|
|
1070
979
|
var attrs = this.rawAttributes;
|
|
@@ -1080,13 +989,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1080
989
|
this._attrs[k2] = decode(attrs[key]);
|
|
1081
990
|
}
|
|
1082
991
|
// Update rawString
|
|
1083
|
-
this.rawAttrs = Object.keys(attrs)
|
|
992
|
+
this.rawAttrs = Object.keys(attrs)
|
|
993
|
+
.map(function (name) {
|
|
1084
994
|
var val = _this.quoteAttribute(attrs[name]);
|
|
1085
|
-
if (val === 'null' || val === '""')
|
|
995
|
+
if (val === 'null' || val === '""')
|
|
1086
996
|
return name;
|
|
1087
|
-
}
|
|
1088
997
|
return name + "=" + val;
|
|
1089
|
-
})
|
|
998
|
+
})
|
|
999
|
+
.join(' ');
|
|
1090
1000
|
// Update this.id
|
|
1091
1001
|
if (key === 'id') {
|
|
1092
1002
|
this.id = value;
|
|
@@ -1107,13 +1017,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1107
1017
|
delete this._rawAttrs;
|
|
1108
1018
|
}
|
|
1109
1019
|
// Update rawString
|
|
1110
|
-
this.rawAttrs = Object.keys(attributes)
|
|
1020
|
+
this.rawAttrs = Object.keys(attributes)
|
|
1021
|
+
.map(function (name) {
|
|
1111
1022
|
var val = attributes[name];
|
|
1112
|
-
if (val === 'null' || val === '""')
|
|
1023
|
+
if (val === 'null' || val === '""')
|
|
1113
1024
|
return name;
|
|
1114
|
-
}
|
|
1115
1025
|
return name + "=" + _this.quoteAttribute(String(val));
|
|
1116
|
-
})
|
|
1026
|
+
})
|
|
1027
|
+
.join(' ');
|
|
1117
1028
|
};
|
|
1118
1029
|
HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
|
|
1119
1030
|
var _a, _b, _c;
|
|
@@ -1166,9 +1077,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1166
1077
|
var i = 0;
|
|
1167
1078
|
while (i < children.length) {
|
|
1168
1079
|
var child = children[i++];
|
|
1169
|
-
if (this === child)
|
|
1080
|
+
if (this === child)
|
|
1170
1081
|
return children[i] || null;
|
|
1171
|
-
}
|
|
1172
1082
|
}
|
|
1173
1083
|
return null;
|
|
1174
1084
|
}
|
|
@@ -1210,12 +1120,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1210
1120
|
}(node_2.default));
|
|
1211
1121
|
exports.default = HTMLElement;
|
|
1212
1122
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
|
1213
|
-
var kMarkupPattern = /<!--[
|
|
1214
|
-
|
|
1215
|
-
// <([a-z][-.:0-9_a-z]*)\s*\/>
|
|
1216
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
|
|
1217
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
|
|
1218
|
-
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
|
|
1123
|
+
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
|
|
1124
|
+
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
|
|
1219
1125
|
var kSelfClosingElements = {
|
|
1220
1126
|
area: true,
|
|
1221
1127
|
AREA: true,
|
|
@@ -1244,7 +1150,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1244
1150
|
track: true,
|
|
1245
1151
|
TRACK: true,
|
|
1246
1152
|
wbr: true,
|
|
1247
|
-
WBR: true
|
|
1153
|
+
WBR: true,
|
|
1248
1154
|
};
|
|
1249
1155
|
var kElementsClosedByOpening = {
|
|
1250
1156
|
li: { li: true, LI: true },
|
|
@@ -1268,7 +1174,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1268
1174
|
h5: { h5: true, H5: true },
|
|
1269
1175
|
H5: { h5: true, H5: true },
|
|
1270
1176
|
h6: { h6: true, H6: true },
|
|
1271
|
-
H6: { h6: true, H6: true }
|
|
1177
|
+
H6: { h6: true, H6: true },
|
|
1272
1178
|
};
|
|
1273
1179
|
var kElementsClosedByClosing = {
|
|
1274
1180
|
li: { ul: true, ol: true, UL: true, OL: true },
|
|
@@ -1284,7 +1190,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1284
1190
|
td: { tr: true, table: true, TR: true, TABLE: true },
|
|
1285
1191
|
TD: { tr: true, table: true, TR: true, TABLE: true },
|
|
1286
1192
|
th: { tr: true, table: true, TR: true, TABLE: true },
|
|
1287
|
-
TH: { tr: true, table: true, TR: true, TABLE: true }
|
|
1193
|
+
TH: { tr: true, table: true, TR: true, TABLE: true },
|
|
1288
1194
|
};
|
|
1289
1195
|
var frameflag = 'documentfragmentcontainer';
|
|
1290
1196
|
/**
|
|
@@ -1299,45 +1205,39 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1299
1205
|
script: true,
|
|
1300
1206
|
noscript: true,
|
|
1301
1207
|
style: true,
|
|
1302
|
-
pre: true
|
|
1208
|
+
pre: true,
|
|
1303
1209
|
};
|
|
1304
1210
|
var element_names = Object.keys(elements);
|
|
1305
|
-
var kBlockTextElements = element_names.map(function (it) {
|
|
1306
|
-
|
|
1307
|
-
});
|
|
1308
|
-
var kIgnoreElements = element_names.filter(function (it) {
|
|
1309
|
-
return elements[it];
|
|
1310
|
-
}).map(function (it) {
|
|
1311
|
-
return new RegExp(it, 'i');
|
|
1312
|
-
});
|
|
1211
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1212
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1313
1213
|
function element_should_be_ignore(tag) {
|
|
1314
|
-
return kIgnoreElements.some(function (it) {
|
|
1315
|
-
return it.test(tag);
|
|
1316
|
-
});
|
|
1214
|
+
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1317
1215
|
}
|
|
1318
1216
|
function is_block_text_element(tag) {
|
|
1319
|
-
return kBlockTextElements.some(function (it) {
|
|
1320
|
-
return it.test(tag);
|
|
1321
|
-
});
|
|
1217
|
+
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1322
1218
|
}
|
|
1323
|
-
var createRange = function (startPos, endPos) {
|
|
1324
|
-
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1325
|
-
};
|
|
1219
|
+
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1326
1220
|
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1327
1221
|
var currentParent = root;
|
|
1328
1222
|
var stack = [root];
|
|
1329
1223
|
var lastTextPos = -1;
|
|
1224
|
+
var noNestedTagIndex = undefined;
|
|
1330
1225
|
var match;
|
|
1331
1226
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1332
1227
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
1228
|
+
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1333
1229
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1334
1230
|
var frameFlagOffset = frameflag.length + 2;
|
|
1335
1231
|
while ((match = kMarkupPattern.exec(data))) {
|
|
1336
|
-
|
|
1232
|
+
// Note: Object destructuring here consistently tests as higher performance than array destructuring
|
|
1233
|
+
// eslint-disable-next-line prefer-const
|
|
1234
|
+
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
|
|
1235
|
+
var matchLength = matchText.length;
|
|
1236
|
+
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
|
|
1337
1237
|
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1338
1238
|
// Add TextNode if content
|
|
1339
1239
|
if (lastTextPos > -1) {
|
|
1340
|
-
if (lastTextPos +
|
|
1240
|
+
if (lastTextPos + matchLength < tagEndPos) {
|
|
1341
1241
|
var text = data.substring(lastTextPos, tagStartPos);
|
|
1342
1242
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1343
1243
|
}
|
|
@@ -1345,10 +1245,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1345
1245
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1346
1246
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1347
1247
|
// Skip frameflag node
|
|
1348
|
-
if (
|
|
1248
|
+
if (tagName === frameflag)
|
|
1349
1249
|
continue;
|
|
1350
1250
|
// Handle comments
|
|
1351
|
-
if (
|
|
1251
|
+
if (matchText[1] === '!') {
|
|
1352
1252
|
if (options.comment) {
|
|
1353
1253
|
// Only keep what is in between <!-- and -->
|
|
1354
1254
|
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
@@ -1358,42 +1258,46 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1358
1258
|
}
|
|
1359
1259
|
/* -- Handle tag matching -- */
|
|
1360
1260
|
// Fix tag casing if necessary
|
|
1361
|
-
if (
|
|
1362
|
-
|
|
1261
|
+
if (lowerCaseTagName)
|
|
1262
|
+
tagName = tagName.toLowerCase();
|
|
1363
1263
|
// Handle opening tags (ie. <this> not </that>)
|
|
1364
|
-
if (!
|
|
1264
|
+
if (!leadingSlash) {
|
|
1365
1265
|
/* Populate attributes */
|
|
1366
1266
|
var attrs = {};
|
|
1367
|
-
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(
|
|
1368
|
-
|
|
1267
|
+
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
|
|
1268
|
+
var key = attMatch[1], val = attMatch[2];
|
|
1269
|
+
var isQuoted = val[0] === "'" || val[0] === "\"";
|
|
1270
|
+
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
|
|
1369
1271
|
}
|
|
1370
|
-
var
|
|
1371
|
-
if (!
|
|
1372
|
-
if (kElementsClosedByOpening[
|
|
1272
|
+
var parentTagName = currentParent.rawTagName;
|
|
1273
|
+
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
|
|
1274
|
+
if (kElementsClosedByOpening[parentTagName][tagName]) {
|
|
1373
1275
|
stack.pop();
|
|
1374
1276
|
currentParent = (0, back_1.default)(stack);
|
|
1375
1277
|
}
|
|
1376
1278
|
}
|
|
1377
|
-
//
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1279
|
+
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1280
|
+
if (tagName === 'a' || tagName === 'A') {
|
|
1281
|
+
if (noNestedTagIndex !== undefined) {
|
|
1282
|
+
stack.splice(noNestedTagIndex);
|
|
1283
|
+
currentParent = (0, back_1.default)(stack);
|
|
1284
|
+
}
|
|
1285
|
+
noNestedTagIndex = stack.length;
|
|
1382
1286
|
}
|
|
1383
1287
|
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1384
|
-
var tagStartPos_1 = tagEndPos_1 -
|
|
1288
|
+
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1385
1289
|
currentParent = currentParent.appendChild(
|
|
1386
1290
|
// Initialize range (end position updated later for closed tags)
|
|
1387
|
-
new HTMLElement(
|
|
1291
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1388
1292
|
stack.push(currentParent);
|
|
1389
|
-
if (is_block_text_element(
|
|
1293
|
+
if (is_block_text_element(tagName)) {
|
|
1390
1294
|
// Find closing tag
|
|
1391
|
-
var closeMarkup = "</" +
|
|
1392
|
-
var closeIndex =
|
|
1295
|
+
var closeMarkup = "</" + tagName + ">";
|
|
1296
|
+
var closeIndex = lowerCaseTagName
|
|
1393
1297
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1394
1298
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1395
1299
|
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1396
|
-
if (element_should_be_ignore(
|
|
1300
|
+
if (element_should_be_ignore(tagName)) {
|
|
1397
1301
|
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1398
1302
|
if (text.length > 0 && /\S/.test(text)) {
|
|
1399
1303
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
@@ -1405,14 +1309,16 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1405
1309
|
else {
|
|
1406
1310
|
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1407
1311
|
// Cause to be treated as self-closing, because no close found
|
|
1408
|
-
|
|
1312
|
+
leadingSlash = '/';
|
|
1409
1313
|
}
|
|
1410
1314
|
}
|
|
1411
1315
|
}
|
|
1412
1316
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1413
|
-
if (
|
|
1317
|
+
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1414
1318
|
while (true) {
|
|
1415
|
-
if (
|
|
1319
|
+
if (tagName === 'a' || tagName === 'A')
|
|
1320
|
+
noNestedTagIndex = undefined;
|
|
1321
|
+
if (currentParent.rawTagName === tagName) {
|
|
1416
1322
|
// Update range end for closed tag
|
|
1417
1323
|
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1418
1324
|
stack.pop();
|
|
@@ -1420,10 +1326,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1420
1326
|
break;
|
|
1421
1327
|
}
|
|
1422
1328
|
else {
|
|
1423
|
-
var
|
|
1329
|
+
var parentTagName = currentParent.tagName;
|
|
1424
1330
|
// Trying to close current tag, and move on
|
|
1425
|
-
if (kElementsClosedByClosing[
|
|
1426
|
-
if (kElementsClosedByClosing[
|
|
1331
|
+
if (kElementsClosedByClosing[parentTagName]) {
|
|
1332
|
+
if (kElementsClosedByClosing[parentTagName][tagName]) {
|
|
1427
1333
|
stack.pop();
|
|
1428
1334
|
currentParent = (0, back_1.default)(stack);
|
|
1429
1335
|
continue;
|