node-html-parser 4.1.3 → 5.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +42 -0
- package/README.md +10 -4
- package/dist/main.js +175 -266
- package/dist/nodes/html.d.ts +11 -5
- package/dist/nodes/html.js +175 -266
- package/esm/index.js +11 -0
- package/esm/package.json +3 -0
- package/package.json +39 -17
- package/.eslintignore +0 -3
- package/.eslintrc.json +0 -226
- package/.mocharc.yaml +0 -1
- package/dist/esm/back.js +0 -3
- package/dist/esm/index.js +0 -7
- package/dist/esm/matcher.js +0 -101
- package/dist/esm/nodes/comment.js +0 -23
- package/dist/esm/nodes/html.js +0 -1096
- package/dist/esm/nodes/node.js +0 -25
- package/dist/esm/nodes/text.js +0 -95
- package/dist/esm/nodes/type.js +0 -7
- package/dist/esm/parse.js +0 -1
- package/dist/esm/valid.js +0 -9
package/dist/nodes/html.js
CHANGED
|
@@ -32,7 +32,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
|
32
32
|
ar[i] = from[i];
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
return to.concat(ar || from);
|
|
35
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
36
36
|
};
|
|
37
37
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
38
38
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
@@ -47,89 +47,40 @@ var text_1 = __importDefault(require("./text"));
|
|
|
47
47
|
var matcher_1 = __importDefault(require("../matcher"));
|
|
48
48
|
var back_1 = __importDefault(require("../back"));
|
|
49
49
|
var comment_1 = __importDefault(require("./comment"));
|
|
50
|
-
|
|
50
|
+
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
51
51
|
function decode(val) {
|
|
52
52
|
// clone string
|
|
53
53
|
return JSON.parse(JSON.stringify(he_1.default.decode(val)));
|
|
54
54
|
}
|
|
55
55
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
56
|
+
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
57
|
+
var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
|
|
58
|
+
var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
|
|
59
|
+
var tableTags = ['table', 'td', 'tr'];
|
|
60
|
+
var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
|
|
56
61
|
var kBlockElements = new Set();
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
kBlockElements.add(
|
|
66
|
-
kBlockElements.add(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
kBlockElements.add('dl');
|
|
76
|
-
kBlockElements.add('DL');
|
|
77
|
-
kBlockElements.add('dt');
|
|
78
|
-
kBlockElements.add('DT');
|
|
79
|
-
kBlockElements.add('fieldset');
|
|
80
|
-
kBlockElements.add('FIELDSET');
|
|
81
|
-
kBlockElements.add('figcaption');
|
|
82
|
-
kBlockElements.add('FIGCAPTION');
|
|
83
|
-
kBlockElements.add('figure');
|
|
84
|
-
kBlockElements.add('FIGURE');
|
|
85
|
-
kBlockElements.add('footer');
|
|
86
|
-
kBlockElements.add('FOOTER');
|
|
87
|
-
kBlockElements.add('form');
|
|
88
|
-
kBlockElements.add('FORM');
|
|
89
|
-
kBlockElements.add('h1');
|
|
90
|
-
kBlockElements.add('H1');
|
|
91
|
-
kBlockElements.add('h2');
|
|
92
|
-
kBlockElements.add('H2');
|
|
93
|
-
kBlockElements.add('h3');
|
|
94
|
-
kBlockElements.add('H3');
|
|
95
|
-
kBlockElements.add('h4');
|
|
96
|
-
kBlockElements.add('H4');
|
|
97
|
-
kBlockElements.add('h5');
|
|
98
|
-
kBlockElements.add('H5');
|
|
99
|
-
kBlockElements.add('h6');
|
|
100
|
-
kBlockElements.add('H6');
|
|
101
|
-
kBlockElements.add('header');
|
|
102
|
-
kBlockElements.add('HEADER');
|
|
103
|
-
kBlockElements.add('hgroup');
|
|
104
|
-
kBlockElements.add('HGROUP');
|
|
105
|
-
kBlockElements.add('hr');
|
|
106
|
-
kBlockElements.add('HR');
|
|
107
|
-
kBlockElements.add('li');
|
|
108
|
-
kBlockElements.add('LI');
|
|
109
|
-
kBlockElements.add('main');
|
|
110
|
-
kBlockElements.add('MAIN');
|
|
111
|
-
kBlockElements.add('nav');
|
|
112
|
-
kBlockElements.add('NAV');
|
|
113
|
-
kBlockElements.add('ol');
|
|
114
|
-
kBlockElements.add('OL');
|
|
115
|
-
kBlockElements.add('p');
|
|
116
|
-
kBlockElements.add('P');
|
|
117
|
-
kBlockElements.add('pre');
|
|
118
|
-
kBlockElements.add('PRE');
|
|
119
|
-
kBlockElements.add('section');
|
|
120
|
-
kBlockElements.add('SECTION');
|
|
121
|
-
kBlockElements.add('table');
|
|
122
|
-
kBlockElements.add('TABLE');
|
|
123
|
-
kBlockElements.add('td');
|
|
124
|
-
kBlockElements.add('TD');
|
|
125
|
-
kBlockElements.add('tr');
|
|
126
|
-
kBlockElements.add('TR');
|
|
127
|
-
kBlockElements.add('ul');
|
|
128
|
-
kBlockElements.add('UL');
|
|
62
|
+
function addToKBlockElement() {
|
|
63
|
+
var args = [];
|
|
64
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
65
|
+
args[_i] = arguments[_i];
|
|
66
|
+
}
|
|
67
|
+
var addToSet = function (array) {
|
|
68
|
+
for (var index = 0; index < array.length; index++) {
|
|
69
|
+
var element = array[index];
|
|
70
|
+
kBlockElements.add(element);
|
|
71
|
+
kBlockElements.add(element.toUpperCase());
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
|
|
75
|
+
var arg = args_1[_a];
|
|
76
|
+
addToSet(arg);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
|
|
129
80
|
var DOMTokenList = /** @class */ (function () {
|
|
130
81
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
131
82
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
132
|
-
if (afterUpdate === void 0) { afterUpdate =
|
|
83
|
+
if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
|
|
133
84
|
this._set = new Set(valuesInit);
|
|
134
85
|
this._afterUpdate = afterUpdate;
|
|
135
86
|
}
|
|
@@ -150,8 +101,7 @@ var DOMTokenList = /** @class */ (function () {
|
|
|
150
101
|
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
151
102
|
};
|
|
152
103
|
DOMTokenList.prototype.remove = function (c) {
|
|
153
|
-
this._set.delete(c) &&
|
|
154
|
-
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
104
|
+
this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
155
105
|
};
|
|
156
106
|
DOMTokenList.prototype.toggle = function (c) {
|
|
157
107
|
this._validate(c);
|
|
@@ -216,8 +166,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
216
166
|
_this.rawAttrs = rawAttrs || '';
|
|
217
167
|
_this.id = keyAttrs.id || '';
|
|
218
168
|
_this.childNodes = [];
|
|
219
|
-
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return
|
|
220
|
-
);
|
|
169
|
+
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
170
|
+
);
|
|
221
171
|
if (keyAttrs.id) {
|
|
222
172
|
if (!rawAttrs) {
|
|
223
173
|
_this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
|
|
@@ -242,8 +192,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
242
192
|
* @returns {string} quoted value
|
|
243
193
|
*/
|
|
244
194
|
HTMLElement.prototype.quoteAttribute = function (attr) {
|
|
245
|
-
if (attr
|
|
246
|
-
return
|
|
195
|
+
if (attr == null) {
|
|
196
|
+
return 'null';
|
|
247
197
|
}
|
|
248
198
|
return JSON.stringify(attr.replace(/"/g, '"'));
|
|
249
199
|
};
|
|
@@ -265,7 +215,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
265
215
|
*/
|
|
266
216
|
HTMLElement.prototype.removeChild = function (node) {
|
|
267
217
|
this.childNodes = this.childNodes.filter(function (child) {
|
|
268
|
-
return
|
|
218
|
+
return child !== node;
|
|
269
219
|
});
|
|
270
220
|
};
|
|
271
221
|
/**
|
|
@@ -296,6 +246,13 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
296
246
|
enumerable: false,
|
|
297
247
|
configurable: true
|
|
298
248
|
});
|
|
249
|
+
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
|
|
250
|
+
get: function () {
|
|
251
|
+
return voidTags.has(this.localName);
|
|
252
|
+
},
|
|
253
|
+
enumerable: false,
|
|
254
|
+
configurable: true
|
|
255
|
+
});
|
|
299
256
|
Object.defineProperty(HTMLElement.prototype, "rawText", {
|
|
300
257
|
/**
|
|
301
258
|
* Get escpaed (as-it) text value of current node and its children.
|
|
@@ -343,11 +300,11 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
343
300
|
if (node.nodeType === type_1.default.ELEMENT_NODE) {
|
|
344
301
|
if (kBlockElements.has(node.rawTagName)) {
|
|
345
302
|
if (currentBlock.length > 0) {
|
|
346
|
-
blocks.push(currentBlock = []);
|
|
303
|
+
blocks.push((currentBlock = []));
|
|
347
304
|
}
|
|
348
305
|
node.childNodes.forEach(dfs);
|
|
349
306
|
if (currentBlock.length > 0) {
|
|
350
|
-
blocks.push(currentBlock = []);
|
|
307
|
+
blocks.push((currentBlock = []));
|
|
351
308
|
}
|
|
352
309
|
}
|
|
353
310
|
else {
|
|
@@ -370,11 +327,12 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
370
327
|
}
|
|
371
328
|
}
|
|
372
329
|
dfs(this);
|
|
373
|
-
return blocks
|
|
374
|
-
|
|
375
|
-
return block.join('').replace(/\s{2,}/g, ' ');
|
|
330
|
+
return blocks
|
|
331
|
+
.map(function (block) {
|
|
332
|
+
return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
|
|
376
333
|
})
|
|
377
|
-
.join('\n')
|
|
334
|
+
.join('\n')
|
|
335
|
+
.replace(/\s+$/, ''); // trimRight;
|
|
378
336
|
},
|
|
379
337
|
enumerable: false,
|
|
380
338
|
configurable: true
|
|
@@ -382,22 +340,18 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
382
340
|
HTMLElement.prototype.toString = function () {
|
|
383
341
|
var tag = this.rawTagName;
|
|
384
342
|
if (tag) {
|
|
385
|
-
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
|
|
386
|
-
// const is_void = void_tags.has(tag);
|
|
387
|
-
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
388
343
|
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
|
|
389
|
-
|
|
390
|
-
return "<" + tag + attrs + ">";
|
|
391
|
-
}
|
|
392
|
-
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
344
|
+
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
393
345
|
}
|
|
394
346
|
return this.innerHTML;
|
|
395
347
|
};
|
|
396
348
|
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
|
|
397
349
|
get: function () {
|
|
398
|
-
return this.childNodes
|
|
350
|
+
return this.childNodes
|
|
351
|
+
.map(function (child) {
|
|
399
352
|
return child.toString();
|
|
400
|
-
})
|
|
353
|
+
})
|
|
354
|
+
.join('');
|
|
401
355
|
},
|
|
402
356
|
set: function (content) {
|
|
403
357
|
//const r = parse(content, global.options); // TODO global.options ?
|
|
@@ -424,7 +378,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
424
378
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
425
379
|
nodes[_i] = arguments[_i];
|
|
426
380
|
}
|
|
427
|
-
var content = nodes
|
|
381
|
+
var content = nodes
|
|
382
|
+
.map(function (node) {
|
|
428
383
|
if (node instanceof node_1.default) {
|
|
429
384
|
return [node];
|
|
430
385
|
}
|
|
@@ -434,7 +389,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
434
389
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
435
390
|
}
|
|
436
391
|
return [];
|
|
437
|
-
})
|
|
392
|
+
})
|
|
393
|
+
.flat();
|
|
438
394
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
439
395
|
return child === _this;
|
|
440
396
|
});
|
|
@@ -481,8 +437,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
481
437
|
res.push(' '.repeat(indention) + str);
|
|
482
438
|
}
|
|
483
439
|
function dfs(node) {
|
|
484
|
-
var idStr = node.id ?
|
|
485
|
-
var classStr = node.classList.length ?
|
|
440
|
+
var idStr = node.id ? "#" + node.id : '';
|
|
441
|
+
var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
486
442
|
write("" + node.rawTagName + idStr + classStr);
|
|
487
443
|
indention++;
|
|
488
444
|
node.childNodes.forEach(function (childNode) {
|
|
@@ -533,113 +489,57 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
533
489
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
534
490
|
return (0, css_select_1.selectAll)(selector, this, {
|
|
535
491
|
xmlMode: true,
|
|
536
|
-
adapter: matcher_1.default
|
|
492
|
+
adapter: matcher_1.default,
|
|
537
493
|
});
|
|
538
|
-
// let matcher: Matcher;
|
|
539
|
-
// if (selector instanceof Matcher) {
|
|
540
|
-
// matcher = selector;
|
|
541
|
-
// matcher.reset();
|
|
542
|
-
// } else {
|
|
543
|
-
// if (selector.includes(',')) {
|
|
544
|
-
// const selectors = selector.split(',');
|
|
545
|
-
// return Array.from(selectors.reduce((pre, cur) => {
|
|
546
|
-
// const result = this.querySelectorAll(cur.trim());
|
|
547
|
-
// return result.reduce((p, c) => {
|
|
548
|
-
// return p.add(c);
|
|
549
|
-
// }, pre);
|
|
550
|
-
// }, new Set<HTMLElement>()));
|
|
551
|
-
// }
|
|
552
|
-
// matcher = new Matcher(selector);
|
|
553
|
-
// }
|
|
554
|
-
// interface IStack {
|
|
555
|
-
// 0: Node; // node
|
|
556
|
-
// 1: number; // children
|
|
557
|
-
// 2: boolean; // found flag
|
|
558
|
-
// }
|
|
559
|
-
// const stack = [] as IStack[];
|
|
560
|
-
// return this.childNodes.reduce((res, cur) => {
|
|
561
|
-
// stack.push([cur, 0, false]);
|
|
562
|
-
// while (stack.length) {
|
|
563
|
-
// const state = arr_back(stack); // get last element
|
|
564
|
-
// const el = state[0];
|
|
565
|
-
// if (state[1] === 0) {
|
|
566
|
-
// // Seen for first time.
|
|
567
|
-
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
568
|
-
// stack.pop();
|
|
569
|
-
// continue;
|
|
570
|
-
// }
|
|
571
|
-
// const html_el = el as HTMLElement;
|
|
572
|
-
// state[2] = matcher.advance(html_el);
|
|
573
|
-
// if (state[2]) {
|
|
574
|
-
// if (matcher.matched) {
|
|
575
|
-
// res.push(html_el);
|
|
576
|
-
// res.push(...(html_el.querySelectorAll(selector)));
|
|
577
|
-
// // no need to go further.
|
|
578
|
-
// matcher.rewind();
|
|
579
|
-
// stack.pop();
|
|
580
|
-
// continue;
|
|
581
|
-
// }
|
|
582
|
-
// }
|
|
583
|
-
// }
|
|
584
|
-
// if (state[1] < el.childNodes.length) {
|
|
585
|
-
// stack.push([el.childNodes[state[1]++], 0, false]);
|
|
586
|
-
// } else {
|
|
587
|
-
// if (state[2]) {
|
|
588
|
-
// matcher.rewind();
|
|
589
|
-
// }
|
|
590
|
-
// stack.pop();
|
|
591
|
-
// }
|
|
592
|
-
// }
|
|
593
|
-
// return res;
|
|
594
|
-
// }, [] as HTMLElement[]);
|
|
595
494
|
};
|
|
596
495
|
/**
|
|
597
496
|
* Query CSS Selector to find matching node.
|
|
598
497
|
* @param {string} selector Simplified CSS selector
|
|
599
|
-
* @return {HTMLElement} matching node
|
|
498
|
+
* @return {(HTMLElement|null)} matching node
|
|
600
499
|
*/
|
|
601
500
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
602
501
|
return (0, css_select_1.selectOne)(selector, this, {
|
|
603
502
|
xmlMode: true,
|
|
604
|
-
adapter: matcher_1.default
|
|
503
|
+
adapter: matcher_1.default,
|
|
605
504
|
});
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
//
|
|
618
|
-
//
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
505
|
+
};
|
|
506
|
+
/**
|
|
507
|
+
* find elements by their tagName
|
|
508
|
+
* @param {string} tagName the tagName of the elements to select
|
|
509
|
+
*/
|
|
510
|
+
HTMLElement.prototype.getElementsByTagName = function (tagName) {
|
|
511
|
+
var upperCasedTagName = tagName.toUpperCase();
|
|
512
|
+
var re = [];
|
|
513
|
+
var stack = [];
|
|
514
|
+
var currentNodeReference = this;
|
|
515
|
+
var index = 0;
|
|
516
|
+
// index turns to undefined once the stack is empty and the first condition occurs
|
|
517
|
+
// which happens once all relevant children are searched through
|
|
518
|
+
while (index !== undefined) {
|
|
519
|
+
var child = void 0;
|
|
520
|
+
// make it work with sparse arrays
|
|
521
|
+
do {
|
|
522
|
+
child = currentNodeReference.childNodes[index++];
|
|
523
|
+
} while (index < currentNodeReference.childNodes.length && child === undefined);
|
|
524
|
+
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
|
|
525
|
+
if (child === undefined) {
|
|
526
|
+
currentNodeReference = currentNodeReference.parentNode;
|
|
527
|
+
index = stack.pop();
|
|
528
|
+
continue;
|
|
529
|
+
}
|
|
530
|
+
if (child.nodeType === type_1.default.ELEMENT_NODE) {
|
|
531
|
+
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
|
|
532
|
+
if (tagName === '*' || child.tagName === upperCasedTagName)
|
|
533
|
+
re.push(child);
|
|
534
|
+
// if children are existing push the current status to the stack and keep searching for elements in the level below
|
|
535
|
+
if (child.childNodes.length > 0) {
|
|
536
|
+
stack.push(index);
|
|
537
|
+
currentNodeReference = child;
|
|
538
|
+
index = 0;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
return re;
|
|
643
543
|
};
|
|
644
544
|
/**
|
|
645
545
|
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
|
|
@@ -681,7 +581,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
681
581
|
return [node];
|
|
682
582
|
}, findOne: findOne, findAll: function () {
|
|
683
583
|
return [];
|
|
684
|
-
} })
|
|
584
|
+
} }),
|
|
685
585
|
});
|
|
686
586
|
if (e) {
|
|
687
587
|
return e;
|
|
@@ -759,7 +659,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
759
659
|
});
|
|
760
660
|
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
|
|
761
661
|
/**
|
|
762
|
-
* Get escaped (as-
|
|
662
|
+
* Get escaped (as-is) attributes
|
|
763
663
|
* @return {Object} parsed attributes
|
|
764
664
|
*/
|
|
765
665
|
get: function () {
|
|
@@ -768,10 +668,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
768
668
|
}
|
|
769
669
|
var attrs = {};
|
|
770
670
|
if (this.rawAttrs) {
|
|
771
|
-
var re =
|
|
671
|
+
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
772
672
|
var match = void 0;
|
|
773
673
|
while ((match = re.exec(this.rawAttrs))) {
|
|
774
|
-
|
|
674
|
+
var key = match[1];
|
|
675
|
+
var val = match[2] || null;
|
|
676
|
+
if (val && (val[0] === "'" || val[0] === "\""))
|
|
677
|
+
val = val.slice(1, val.length - 1);
|
|
678
|
+
attrs[key] = val;
|
|
775
679
|
}
|
|
776
680
|
}
|
|
777
681
|
this._rawAttrs = attrs;
|
|
@@ -788,13 +692,15 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
788
692
|
delete this._attrs[key];
|
|
789
693
|
}
|
|
790
694
|
// Update rawString
|
|
791
|
-
this.rawAttrs = Object.keys(attrs)
|
|
695
|
+
this.rawAttrs = Object.keys(attrs)
|
|
696
|
+
.map(function (name) {
|
|
792
697
|
var val = JSON.stringify(attrs[name]);
|
|
793
698
|
if (val === undefined || val === 'null') {
|
|
794
699
|
return name;
|
|
795
700
|
}
|
|
796
701
|
return name + "=" + val;
|
|
797
|
-
})
|
|
702
|
+
})
|
|
703
|
+
.join(' ');
|
|
798
704
|
// Update this.id
|
|
799
705
|
if (key === 'id') {
|
|
800
706
|
this.id = '';
|
|
@@ -818,7 +724,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
818
724
|
HTMLElement.prototype.setAttribute = function (key, value) {
|
|
819
725
|
var _this = this;
|
|
820
726
|
if (arguments.length < 2) {
|
|
821
|
-
throw new Error(
|
|
727
|
+
throw new Error("Failed to execute 'setAttribute' on 'Element'");
|
|
822
728
|
}
|
|
823
729
|
var k2 = key.toLowerCase();
|
|
824
730
|
var attrs = this.rawAttributes;
|
|
@@ -834,13 +740,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
834
740
|
this._attrs[k2] = decode(attrs[key]);
|
|
835
741
|
}
|
|
836
742
|
// Update rawString
|
|
837
|
-
this.rawAttrs = Object.keys(attrs)
|
|
743
|
+
this.rawAttrs = Object.keys(attrs)
|
|
744
|
+
.map(function (name) {
|
|
838
745
|
var val = _this.quoteAttribute(attrs[name]);
|
|
839
|
-
if (val === 'null' || val === '""')
|
|
746
|
+
if (val === 'null' || val === '""')
|
|
840
747
|
return name;
|
|
841
|
-
}
|
|
842
748
|
return name + "=" + val;
|
|
843
|
-
})
|
|
749
|
+
})
|
|
750
|
+
.join(' ');
|
|
844
751
|
// Update this.id
|
|
845
752
|
if (key === 'id') {
|
|
846
753
|
this.id = value;
|
|
@@ -861,13 +768,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
861
768
|
delete this._rawAttrs;
|
|
862
769
|
}
|
|
863
770
|
// Update rawString
|
|
864
|
-
this.rawAttrs = Object.keys(attributes)
|
|
771
|
+
this.rawAttrs = Object.keys(attributes)
|
|
772
|
+
.map(function (name) {
|
|
865
773
|
var val = attributes[name];
|
|
866
|
-
if (val === 'null' || val === '""')
|
|
774
|
+
if (val === 'null' || val === '""')
|
|
867
775
|
return name;
|
|
868
|
-
}
|
|
869
776
|
return name + "=" + _this.quoteAttribute(String(val));
|
|
870
|
-
})
|
|
777
|
+
})
|
|
778
|
+
.join(' ');
|
|
871
779
|
};
|
|
872
780
|
HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
|
|
873
781
|
var _a, _b, _c;
|
|
@@ -920,9 +828,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
920
828
|
var i = 0;
|
|
921
829
|
while (i < children.length) {
|
|
922
830
|
var child = children[i++];
|
|
923
|
-
if (this === child)
|
|
831
|
+
if (this === child)
|
|
924
832
|
return children[i] || null;
|
|
925
|
-
}
|
|
926
833
|
}
|
|
927
834
|
return null;
|
|
928
835
|
}
|
|
@@ -964,12 +871,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
964
871
|
}(node_1.default));
|
|
965
872
|
exports.default = HTMLElement;
|
|
966
873
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
|
967
|
-
var kMarkupPattern = /<!--[
|
|
968
|
-
|
|
969
|
-
// <([a-z][-.:0-9_a-z]*)\s*\/>
|
|
970
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
|
|
971
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
|
|
972
|
-
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
|
|
874
|
+
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
|
|
875
|
+
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
|
|
973
876
|
var kSelfClosingElements = {
|
|
974
877
|
area: true,
|
|
975
878
|
AREA: true,
|
|
@@ -998,7 +901,7 @@ var kSelfClosingElements = {
|
|
|
998
901
|
track: true,
|
|
999
902
|
TRACK: true,
|
|
1000
903
|
wbr: true,
|
|
1001
|
-
WBR: true
|
|
904
|
+
WBR: true,
|
|
1002
905
|
};
|
|
1003
906
|
var kElementsClosedByOpening = {
|
|
1004
907
|
li: { li: true, LI: true },
|
|
@@ -1022,7 +925,7 @@ var kElementsClosedByOpening = {
|
|
|
1022
925
|
h5: { h5: true, H5: true },
|
|
1023
926
|
H5: { h5: true, H5: true },
|
|
1024
927
|
h6: { h6: true, H6: true },
|
|
1025
|
-
H6: { h6: true, H6: true }
|
|
928
|
+
H6: { h6: true, H6: true },
|
|
1026
929
|
};
|
|
1027
930
|
var kElementsClosedByClosing = {
|
|
1028
931
|
li: { ul: true, ol: true, UL: true, OL: true },
|
|
@@ -1038,7 +941,7 @@ var kElementsClosedByClosing = {
|
|
|
1038
941
|
td: { tr: true, table: true, TR: true, TABLE: true },
|
|
1039
942
|
TD: { tr: true, table: true, TR: true, TABLE: true },
|
|
1040
943
|
th: { tr: true, table: true, TR: true, TABLE: true },
|
|
1041
|
-
TH: { tr: true, table: true, TR: true, TABLE: true }
|
|
944
|
+
TH: { tr: true, table: true, TR: true, TABLE: true },
|
|
1042
945
|
};
|
|
1043
946
|
var frameflag = 'documentfragmentcontainer';
|
|
1044
947
|
/**
|
|
@@ -1053,45 +956,39 @@ function base_parse(data, options) {
|
|
|
1053
956
|
script: true,
|
|
1054
957
|
noscript: true,
|
|
1055
958
|
style: true,
|
|
1056
|
-
pre: true
|
|
959
|
+
pre: true,
|
|
1057
960
|
};
|
|
1058
961
|
var element_names = Object.keys(elements);
|
|
1059
|
-
var kBlockTextElements = element_names.map(function (it) {
|
|
1060
|
-
|
|
1061
|
-
});
|
|
1062
|
-
var kIgnoreElements = element_names.filter(function (it) {
|
|
1063
|
-
return elements[it];
|
|
1064
|
-
}).map(function (it) {
|
|
1065
|
-
return new RegExp(it, 'i');
|
|
1066
|
-
});
|
|
962
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
963
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1067
964
|
function element_should_be_ignore(tag) {
|
|
1068
|
-
return kIgnoreElements.some(function (it) {
|
|
1069
|
-
return it.test(tag);
|
|
1070
|
-
});
|
|
965
|
+
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1071
966
|
}
|
|
1072
967
|
function is_block_text_element(tag) {
|
|
1073
|
-
return kBlockTextElements.some(function (it) {
|
|
1074
|
-
return it.test(tag);
|
|
1075
|
-
});
|
|
968
|
+
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1076
969
|
}
|
|
1077
|
-
var createRange = function (startPos, endPos) {
|
|
1078
|
-
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1079
|
-
};
|
|
970
|
+
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1080
971
|
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1081
972
|
var currentParent = root;
|
|
1082
973
|
var stack = [root];
|
|
1083
974
|
var lastTextPos = -1;
|
|
975
|
+
var noNestedTagIndex = undefined;
|
|
1084
976
|
var match;
|
|
1085
977
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1086
978
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
979
|
+
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1087
980
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1088
981
|
var frameFlagOffset = frameflag.length + 2;
|
|
1089
982
|
while ((match = kMarkupPattern.exec(data))) {
|
|
1090
|
-
|
|
983
|
+
// Note: Object destructuring here consistently tests as higher performance than array destructuring
|
|
984
|
+
// eslint-disable-next-line prefer-const
|
|
985
|
+
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
|
|
986
|
+
var matchLength = matchText.length;
|
|
987
|
+
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
|
|
1091
988
|
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1092
989
|
// Add TextNode if content
|
|
1093
990
|
if (lastTextPos > -1) {
|
|
1094
|
-
if (lastTextPos +
|
|
991
|
+
if (lastTextPos + matchLength < tagEndPos) {
|
|
1095
992
|
var text = data.substring(lastTextPos, tagStartPos);
|
|
1096
993
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1097
994
|
}
|
|
@@ -1099,10 +996,10 @@ function base_parse(data, options) {
|
|
|
1099
996
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1100
997
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1101
998
|
// Skip frameflag node
|
|
1102
|
-
if (
|
|
999
|
+
if (tagName === frameflag)
|
|
1103
1000
|
continue;
|
|
1104
1001
|
// Handle comments
|
|
1105
|
-
if (
|
|
1002
|
+
if (matchText[1] === '!') {
|
|
1106
1003
|
if (options.comment) {
|
|
1107
1004
|
// Only keep what is in between <!-- and -->
|
|
1108
1005
|
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
@@ -1112,36 +1009,46 @@ function base_parse(data, options) {
|
|
|
1112
1009
|
}
|
|
1113
1010
|
/* -- Handle tag matching -- */
|
|
1114
1011
|
// Fix tag casing if necessary
|
|
1115
|
-
if (
|
|
1116
|
-
|
|
1012
|
+
if (lowerCaseTagName)
|
|
1013
|
+
tagName = tagName.toLowerCase();
|
|
1117
1014
|
// Handle opening tags (ie. <this> not </that>)
|
|
1118
|
-
if (!
|
|
1015
|
+
if (!leadingSlash) {
|
|
1119
1016
|
/* Populate attributes */
|
|
1120
1017
|
var attrs = {};
|
|
1121
|
-
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(
|
|
1122
|
-
|
|
1018
|
+
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
|
|
1019
|
+
var key = attMatch[1], val = attMatch[2];
|
|
1020
|
+
var isQuoted = val[0] === "'" || val[0] === "\"";
|
|
1021
|
+
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
|
|
1123
1022
|
}
|
|
1124
|
-
var
|
|
1125
|
-
if (!
|
|
1126
|
-
if (kElementsClosedByOpening[
|
|
1023
|
+
var parentTagName = currentParent.rawTagName;
|
|
1024
|
+
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
|
|
1025
|
+
if (kElementsClosedByOpening[parentTagName][tagName]) {
|
|
1127
1026
|
stack.pop();
|
|
1128
1027
|
currentParent = (0, back_1.default)(stack);
|
|
1129
1028
|
}
|
|
1130
1029
|
}
|
|
1030
|
+
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1031
|
+
if (tagName === 'a' || tagName === 'A') {
|
|
1032
|
+
if (noNestedTagIndex !== undefined) {
|
|
1033
|
+
stack.splice(noNestedTagIndex);
|
|
1034
|
+
currentParent = (0, back_1.default)(stack);
|
|
1035
|
+
}
|
|
1036
|
+
noNestedTagIndex = stack.length;
|
|
1037
|
+
}
|
|
1131
1038
|
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1132
|
-
var tagStartPos_1 = tagEndPos_1 -
|
|
1039
|
+
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1133
1040
|
currentParent = currentParent.appendChild(
|
|
1134
1041
|
// Initialize range (end position updated later for closed tags)
|
|
1135
|
-
new HTMLElement(
|
|
1042
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1136
1043
|
stack.push(currentParent);
|
|
1137
|
-
if (is_block_text_element(
|
|
1044
|
+
if (is_block_text_element(tagName)) {
|
|
1138
1045
|
// Find closing tag
|
|
1139
|
-
var closeMarkup = "</" +
|
|
1140
|
-
var closeIndex =
|
|
1046
|
+
var closeMarkup = "</" + tagName + ">";
|
|
1047
|
+
var closeIndex = lowerCaseTagName
|
|
1141
1048
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1142
1049
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1143
1050
|
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1144
|
-
if (element_should_be_ignore(
|
|
1051
|
+
if (element_should_be_ignore(tagName)) {
|
|
1145
1052
|
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1146
1053
|
if (text.length > 0 && /\S/.test(text)) {
|
|
1147
1054
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
@@ -1153,14 +1060,16 @@ function base_parse(data, options) {
|
|
|
1153
1060
|
else {
|
|
1154
1061
|
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1155
1062
|
// Cause to be treated as self-closing, because no close found
|
|
1156
|
-
|
|
1063
|
+
leadingSlash = '/';
|
|
1157
1064
|
}
|
|
1158
1065
|
}
|
|
1159
1066
|
}
|
|
1160
1067
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1161
|
-
if (
|
|
1068
|
+
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1162
1069
|
while (true) {
|
|
1163
|
-
if (
|
|
1070
|
+
if (tagName === 'a' || tagName === 'A')
|
|
1071
|
+
noNestedTagIndex = undefined;
|
|
1072
|
+
if (currentParent.rawTagName === tagName) {
|
|
1164
1073
|
// Update range end for closed tag
|
|
1165
1074
|
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1166
1075
|
stack.pop();
|
|
@@ -1168,10 +1077,10 @@ function base_parse(data, options) {
|
|
|
1168
1077
|
break;
|
|
1169
1078
|
}
|
|
1170
1079
|
else {
|
|
1171
|
-
var
|
|
1080
|
+
var parentTagName = currentParent.tagName;
|
|
1172
1081
|
// Trying to close current tag, and move on
|
|
1173
|
-
if (kElementsClosedByClosing[
|
|
1174
|
-
if (kElementsClosedByClosing[
|
|
1082
|
+
if (kElementsClosedByClosing[parentTagName]) {
|
|
1083
|
+
if (kElementsClosedByClosing[parentTagName][tagName]) {
|
|
1175
1084
|
stack.pop();
|
|
1176
1085
|
currentParent = (0, back_1.default)(stack);
|
|
1177
1086
|
continue;
|