node-html-parser 4.1.4 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +34 -25
- package/dist/main.js +177 -271
- package/dist/nodes/html.d.ts +12 -5
- package/dist/nodes/html.js +177 -271
- package/esm/index.js +11 -0
- package/esm/package.json +3 -0
- package/package.json +46 -17
- package/.eslintignore +0 -3
- package/.eslintrc.json +0 -226
- package/.mocharc.yaml +0 -1
- package/dist/esm/back.js +0 -3
- package/dist/esm/index.js +0 -7
- package/dist/esm/matcher.js +0 -101
- package/dist/esm/nodes/comment.js +0 -23
- package/dist/esm/nodes/html.js +0 -1102
- package/dist/esm/nodes/node.js +0 -25
- package/dist/esm/nodes/text.js +0 -95
- package/dist/esm/nodes/type.js +0 -7
- package/dist/esm/parse.js +0 -1
- package/dist/esm/valid.js +0 -9
package/dist/nodes/html.js
CHANGED
|
@@ -32,7 +32,7 @@ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
|
32
32
|
ar[i] = from[i];
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
|
-
return to.concat(ar || from);
|
|
35
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
36
36
|
};
|
|
37
37
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
38
38
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
@@ -47,89 +47,40 @@ var text_1 = __importDefault(require("./text"));
|
|
|
47
47
|
var matcher_1 = __importDefault(require("../matcher"));
|
|
48
48
|
var back_1 = __importDefault(require("../back"));
|
|
49
49
|
var comment_1 = __importDefault(require("./comment"));
|
|
50
|
-
|
|
50
|
+
var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
|
|
51
51
|
function decode(val) {
|
|
52
52
|
// clone string
|
|
53
53
|
return JSON.parse(JSON.stringify(he_1.default.decode(val)));
|
|
54
54
|
}
|
|
55
55
|
// https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
|
|
56
|
+
var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
|
|
57
|
+
var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
|
|
58
|
+
var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
|
|
59
|
+
var tableTags = ['table', 'td', 'tr'];
|
|
60
|
+
var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
|
|
56
61
|
var kBlockElements = new Set();
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
kBlockElements.add(
|
|
66
|
-
kBlockElements.add(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
kBlockElements.add('dl');
|
|
76
|
-
kBlockElements.add('DL');
|
|
77
|
-
kBlockElements.add('dt');
|
|
78
|
-
kBlockElements.add('DT');
|
|
79
|
-
kBlockElements.add('fieldset');
|
|
80
|
-
kBlockElements.add('FIELDSET');
|
|
81
|
-
kBlockElements.add('figcaption');
|
|
82
|
-
kBlockElements.add('FIGCAPTION');
|
|
83
|
-
kBlockElements.add('figure');
|
|
84
|
-
kBlockElements.add('FIGURE');
|
|
85
|
-
kBlockElements.add('footer');
|
|
86
|
-
kBlockElements.add('FOOTER');
|
|
87
|
-
kBlockElements.add('form');
|
|
88
|
-
kBlockElements.add('FORM');
|
|
89
|
-
kBlockElements.add('h1');
|
|
90
|
-
kBlockElements.add('H1');
|
|
91
|
-
kBlockElements.add('h2');
|
|
92
|
-
kBlockElements.add('H2');
|
|
93
|
-
kBlockElements.add('h3');
|
|
94
|
-
kBlockElements.add('H3');
|
|
95
|
-
kBlockElements.add('h4');
|
|
96
|
-
kBlockElements.add('H4');
|
|
97
|
-
kBlockElements.add('h5');
|
|
98
|
-
kBlockElements.add('H5');
|
|
99
|
-
kBlockElements.add('h6');
|
|
100
|
-
kBlockElements.add('H6');
|
|
101
|
-
kBlockElements.add('header');
|
|
102
|
-
kBlockElements.add('HEADER');
|
|
103
|
-
kBlockElements.add('hgroup');
|
|
104
|
-
kBlockElements.add('HGROUP');
|
|
105
|
-
kBlockElements.add('hr');
|
|
106
|
-
kBlockElements.add('HR');
|
|
107
|
-
kBlockElements.add('li');
|
|
108
|
-
kBlockElements.add('LI');
|
|
109
|
-
kBlockElements.add('main');
|
|
110
|
-
kBlockElements.add('MAIN');
|
|
111
|
-
kBlockElements.add('nav');
|
|
112
|
-
kBlockElements.add('NAV');
|
|
113
|
-
kBlockElements.add('ol');
|
|
114
|
-
kBlockElements.add('OL');
|
|
115
|
-
kBlockElements.add('p');
|
|
116
|
-
kBlockElements.add('P');
|
|
117
|
-
kBlockElements.add('pre');
|
|
118
|
-
kBlockElements.add('PRE');
|
|
119
|
-
kBlockElements.add('section');
|
|
120
|
-
kBlockElements.add('SECTION');
|
|
121
|
-
kBlockElements.add('table');
|
|
122
|
-
kBlockElements.add('TABLE');
|
|
123
|
-
kBlockElements.add('td');
|
|
124
|
-
kBlockElements.add('TD');
|
|
125
|
-
kBlockElements.add('tr');
|
|
126
|
-
kBlockElements.add('TR');
|
|
127
|
-
kBlockElements.add('ul');
|
|
128
|
-
kBlockElements.add('UL');
|
|
62
|
+
function addToKBlockElement() {
|
|
63
|
+
var args = [];
|
|
64
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
65
|
+
args[_i] = arguments[_i];
|
|
66
|
+
}
|
|
67
|
+
var addToSet = function (array) {
|
|
68
|
+
for (var index = 0; index < array.length; index++) {
|
|
69
|
+
var element = array[index];
|
|
70
|
+
kBlockElements.add(element);
|
|
71
|
+
kBlockElements.add(element.toUpperCase());
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
|
|
75
|
+
var arg = args_1[_a];
|
|
76
|
+
addToSet(arg);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
|
|
129
80
|
var DOMTokenList = /** @class */ (function () {
|
|
130
81
|
function DOMTokenList(valuesInit, afterUpdate) {
|
|
131
82
|
if (valuesInit === void 0) { valuesInit = []; }
|
|
132
|
-
if (afterUpdate === void 0) { afterUpdate =
|
|
83
|
+
if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
|
|
133
84
|
this._set = new Set(valuesInit);
|
|
134
85
|
this._afterUpdate = afterUpdate;
|
|
135
86
|
}
|
|
@@ -150,8 +101,7 @@ var DOMTokenList = /** @class */ (function () {
|
|
|
150
101
|
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
151
102
|
};
|
|
152
103
|
DOMTokenList.prototype.remove = function (c) {
|
|
153
|
-
this._set.delete(c) &&
|
|
154
|
-
this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
104
|
+
this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
|
|
155
105
|
};
|
|
156
106
|
DOMTokenList.prototype.toggle = function (c) {
|
|
157
107
|
this._validate(c);
|
|
@@ -216,8 +166,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
216
166
|
_this.rawAttrs = rawAttrs || '';
|
|
217
167
|
_this.id = keyAttrs.id || '';
|
|
218
168
|
_this.childNodes = [];
|
|
219
|
-
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return
|
|
220
|
-
);
|
|
169
|
+
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
170
|
+
);
|
|
221
171
|
if (keyAttrs.id) {
|
|
222
172
|
if (!rawAttrs) {
|
|
223
173
|
_this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
|
|
@@ -242,8 +192,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
242
192
|
* @returns {string} quoted value
|
|
243
193
|
*/
|
|
244
194
|
HTMLElement.prototype.quoteAttribute = function (attr) {
|
|
245
|
-
if (attr
|
|
246
|
-
return
|
|
195
|
+
if (attr == null) {
|
|
196
|
+
return 'null';
|
|
247
197
|
}
|
|
248
198
|
return JSON.stringify(attr.replace(/"/g, '"'));
|
|
249
199
|
};
|
|
@@ -265,7 +215,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
265
215
|
*/
|
|
266
216
|
HTMLElement.prototype.removeChild = function (node) {
|
|
267
217
|
this.childNodes = this.childNodes.filter(function (child) {
|
|
268
|
-
return
|
|
218
|
+
return child !== node;
|
|
269
219
|
});
|
|
270
220
|
};
|
|
271
221
|
/**
|
|
@@ -286,6 +236,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
286
236
|
get: function () {
|
|
287
237
|
return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
|
|
288
238
|
},
|
|
239
|
+
set: function (newname) {
|
|
240
|
+
this.rawTagName = newname.toLowerCase();
|
|
241
|
+
},
|
|
289
242
|
enumerable: false,
|
|
290
243
|
configurable: true
|
|
291
244
|
});
|
|
@@ -296,6 +249,13 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
296
249
|
enumerable: false,
|
|
297
250
|
configurable: true
|
|
298
251
|
});
|
|
252
|
+
Object.defineProperty(HTMLElement.prototype, "isVoidElement", {
|
|
253
|
+
get: function () {
|
|
254
|
+
return voidTags.has(this.localName);
|
|
255
|
+
},
|
|
256
|
+
enumerable: false,
|
|
257
|
+
configurable: true
|
|
258
|
+
});
|
|
299
259
|
Object.defineProperty(HTMLElement.prototype, "rawText", {
|
|
300
260
|
/**
|
|
301
261
|
* Get escpaed (as-it) text value of current node and its children.
|
|
@@ -343,11 +303,11 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
343
303
|
if (node.nodeType === type_1.default.ELEMENT_NODE) {
|
|
344
304
|
if (kBlockElements.has(node.rawTagName)) {
|
|
345
305
|
if (currentBlock.length > 0) {
|
|
346
|
-
blocks.push(currentBlock = []);
|
|
306
|
+
blocks.push((currentBlock = []));
|
|
347
307
|
}
|
|
348
308
|
node.childNodes.forEach(dfs);
|
|
349
309
|
if (currentBlock.length > 0) {
|
|
350
|
-
blocks.push(currentBlock = []);
|
|
310
|
+
blocks.push((currentBlock = []));
|
|
351
311
|
}
|
|
352
312
|
}
|
|
353
313
|
else {
|
|
@@ -370,11 +330,12 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
370
330
|
}
|
|
371
331
|
}
|
|
372
332
|
dfs(this);
|
|
373
|
-
return blocks
|
|
374
|
-
|
|
375
|
-
return block.join('').replace(/\s{2,}/g, ' ');
|
|
333
|
+
return blocks
|
|
334
|
+
.map(function (block) {
|
|
335
|
+
return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
|
|
376
336
|
})
|
|
377
|
-
.join('\n')
|
|
337
|
+
.join('\n')
|
|
338
|
+
.replace(/\s+$/, ''); // trimRight;
|
|
378
339
|
},
|
|
379
340
|
enumerable: false,
|
|
380
341
|
configurable: true
|
|
@@ -382,22 +343,18 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
382
343
|
HTMLElement.prototype.toString = function () {
|
|
383
344
|
var tag = this.rawTagName;
|
|
384
345
|
if (tag) {
|
|
385
|
-
// const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
|
|
386
|
-
// const is_void = void_tags.has(tag);
|
|
387
|
-
var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
|
|
388
346
|
var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
|
|
389
|
-
|
|
390
|
-
return "<" + tag + attrs + ">";
|
|
391
|
-
}
|
|
392
|
-
return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
347
|
+
return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
|
|
393
348
|
}
|
|
394
349
|
return this.innerHTML;
|
|
395
350
|
};
|
|
396
351
|
Object.defineProperty(HTMLElement.prototype, "innerHTML", {
|
|
397
352
|
get: function () {
|
|
398
|
-
return this.childNodes
|
|
353
|
+
return this.childNodes
|
|
354
|
+
.map(function (child) {
|
|
399
355
|
return child.toString();
|
|
400
|
-
})
|
|
356
|
+
})
|
|
357
|
+
.join('');
|
|
401
358
|
},
|
|
402
359
|
set: function (content) {
|
|
403
360
|
//const r = parse(content, global.options); // TODO global.options ?
|
|
@@ -424,7 +381,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
424
381
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
425
382
|
nodes[_i] = arguments[_i];
|
|
426
383
|
}
|
|
427
|
-
var content = nodes
|
|
384
|
+
var content = nodes
|
|
385
|
+
.map(function (node) {
|
|
428
386
|
if (node instanceof node_1.default) {
|
|
429
387
|
return [node];
|
|
430
388
|
}
|
|
@@ -434,7 +392,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
434
392
|
return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
|
|
435
393
|
}
|
|
436
394
|
return [];
|
|
437
|
-
})
|
|
395
|
+
})
|
|
396
|
+
.flat();
|
|
438
397
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
439
398
|
return child === _this;
|
|
440
399
|
});
|
|
@@ -481,8 +440,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
481
440
|
res.push(' '.repeat(indention) + str);
|
|
482
441
|
}
|
|
483
442
|
function dfs(node) {
|
|
484
|
-
var idStr = node.id ?
|
|
485
|
-
var classStr = node.classList.length ?
|
|
443
|
+
var idStr = node.id ? "#" + node.id : '';
|
|
444
|
+
var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
486
445
|
write("" + node.rawTagName + idStr + classStr);
|
|
487
446
|
indention++;
|
|
488
447
|
node.childNodes.forEach(function (childNode) {
|
|
@@ -533,113 +492,57 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
533
492
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
534
493
|
return (0, css_select_1.selectAll)(selector, this, {
|
|
535
494
|
xmlMode: true,
|
|
536
|
-
adapter: matcher_1.default
|
|
495
|
+
adapter: matcher_1.default,
|
|
537
496
|
});
|
|
538
|
-
// let matcher: Matcher;
|
|
539
|
-
// if (selector instanceof Matcher) {
|
|
540
|
-
// matcher = selector;
|
|
541
|
-
// matcher.reset();
|
|
542
|
-
// } else {
|
|
543
|
-
// if (selector.includes(',')) {
|
|
544
|
-
// const selectors = selector.split(',');
|
|
545
|
-
// return Array.from(selectors.reduce((pre, cur) => {
|
|
546
|
-
// const result = this.querySelectorAll(cur.trim());
|
|
547
|
-
// return result.reduce((p, c) => {
|
|
548
|
-
// return p.add(c);
|
|
549
|
-
// }, pre);
|
|
550
|
-
// }, new Set<HTMLElement>()));
|
|
551
|
-
// }
|
|
552
|
-
// matcher = new Matcher(selector);
|
|
553
|
-
// }
|
|
554
|
-
// interface IStack {
|
|
555
|
-
// 0: Node; // node
|
|
556
|
-
// 1: number; // children
|
|
557
|
-
// 2: boolean; // found flag
|
|
558
|
-
// }
|
|
559
|
-
// const stack = [] as IStack[];
|
|
560
|
-
// return this.childNodes.reduce((res, cur) => {
|
|
561
|
-
// stack.push([cur, 0, false]);
|
|
562
|
-
// while (stack.length) {
|
|
563
|
-
// const state = arr_back(stack); // get last element
|
|
564
|
-
// const el = state[0];
|
|
565
|
-
// if (state[1] === 0) {
|
|
566
|
-
// // Seen for first time.
|
|
567
|
-
// if (el.nodeType !== NodeType.ELEMENT_NODE) {
|
|
568
|
-
// stack.pop();
|
|
569
|
-
// continue;
|
|
570
|
-
// }
|
|
571
|
-
// const html_el = el as HTMLElement;
|
|
572
|
-
// state[2] = matcher.advance(html_el);
|
|
573
|
-
// if (state[2]) {
|
|
574
|
-
// if (matcher.matched) {
|
|
575
|
-
// res.push(html_el);
|
|
576
|
-
// res.push(...(html_el.querySelectorAll(selector)));
|
|
577
|
-
// // no need to go further.
|
|
578
|
-
// matcher.rewind();
|
|
579
|
-
// stack.pop();
|
|
580
|
-
// continue;
|
|
581
|
-
// }
|
|
582
|
-
// }
|
|
583
|
-
// }
|
|
584
|
-
// if (state[1] < el.childNodes.length) {
|
|
585
|
-
// stack.push([el.childNodes[state[1]++], 0, false]);
|
|
586
|
-
// } else {
|
|
587
|
-
// if (state[2]) {
|
|
588
|
-
// matcher.rewind();
|
|
589
|
-
// }
|
|
590
|
-
// stack.pop();
|
|
591
|
-
// }
|
|
592
|
-
// }
|
|
593
|
-
// return res;
|
|
594
|
-
// }, [] as HTMLElement[]);
|
|
595
497
|
};
|
|
596
498
|
/**
|
|
597
499
|
* Query CSS Selector to find matching node.
|
|
598
500
|
* @param {string} selector Simplified CSS selector
|
|
599
|
-
* @return {HTMLElement} matching node
|
|
501
|
+
* @return {(HTMLElement|null)} matching node
|
|
600
502
|
*/
|
|
601
503
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
602
504
|
return (0, css_select_1.selectOne)(selector, this, {
|
|
603
505
|
xmlMode: true,
|
|
604
|
-
adapter: matcher_1.default
|
|
506
|
+
adapter: matcher_1.default,
|
|
605
507
|
});
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
//
|
|
618
|
-
//
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
508
|
+
};
|
|
509
|
+
/**
|
|
510
|
+
* find elements by their tagName
|
|
511
|
+
* @param {string} tagName the tagName of the elements to select
|
|
512
|
+
*/
|
|
513
|
+
HTMLElement.prototype.getElementsByTagName = function (tagName) {
|
|
514
|
+
var upperCasedTagName = tagName.toUpperCase();
|
|
515
|
+
var re = [];
|
|
516
|
+
var stack = [];
|
|
517
|
+
var currentNodeReference = this;
|
|
518
|
+
var index = 0;
|
|
519
|
+
// index turns to undefined once the stack is empty and the first condition occurs
|
|
520
|
+
// which happens once all relevant children are searched through
|
|
521
|
+
while (index !== undefined) {
|
|
522
|
+
var child = void 0;
|
|
523
|
+
// make it work with sparse arrays
|
|
524
|
+
do {
|
|
525
|
+
child = currentNodeReference.childNodes[index++];
|
|
526
|
+
} while (index < currentNodeReference.childNodes.length && child === undefined);
|
|
527
|
+
// if the child does not exist we move on with the last provided index (which belongs to the parentNode)
|
|
528
|
+
if (child === undefined) {
|
|
529
|
+
currentNodeReference = currentNodeReference.parentNode;
|
|
530
|
+
index = stack.pop();
|
|
531
|
+
continue;
|
|
532
|
+
}
|
|
533
|
+
if (child.nodeType === type_1.default.ELEMENT_NODE) {
|
|
534
|
+
// https://developer.mozilla.org/en-US/docs/Web/API/Element/getElementsByTagName#syntax
|
|
535
|
+
if (tagName === '*' || child.tagName === upperCasedTagName)
|
|
536
|
+
re.push(child);
|
|
537
|
+
// if children are existing push the current status to the stack and keep searching for elements in the level below
|
|
538
|
+
if (child.childNodes.length > 0) {
|
|
539
|
+
stack.push(index);
|
|
540
|
+
currentNodeReference = child;
|
|
541
|
+
index = 0;
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
return re;
|
|
643
546
|
};
|
|
644
547
|
/**
|
|
645
548
|
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
|
|
@@ -681,7 +584,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
681
584
|
return [node];
|
|
682
585
|
}, findOne: findOne, findAll: function () {
|
|
683
586
|
return [];
|
|
684
|
-
} })
|
|
587
|
+
} }),
|
|
685
588
|
});
|
|
686
589
|
if (e) {
|
|
687
590
|
return e;
|
|
@@ -759,7 +662,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
759
662
|
});
|
|
760
663
|
Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
|
|
761
664
|
/**
|
|
762
|
-
* Get escaped (as-
|
|
665
|
+
* Get escaped (as-is) attributes
|
|
763
666
|
* @return {Object} parsed attributes
|
|
764
667
|
*/
|
|
765
668
|
get: function () {
|
|
@@ -768,10 +671,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
768
671
|
}
|
|
769
672
|
var attrs = {};
|
|
770
673
|
if (this.rawAttrs) {
|
|
771
|
-
var re = /([a-
|
|
674
|
+
var re = /([a-zA-Z()#][a-zA-Z0-9-_:()#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
772
675
|
var match = void 0;
|
|
773
676
|
while ((match = re.exec(this.rawAttrs))) {
|
|
774
|
-
|
|
677
|
+
var key = match[1];
|
|
678
|
+
var val = match[2] || null;
|
|
679
|
+
if (val && (val[0] === "'" || val[0] === "\""))
|
|
680
|
+
val = val.slice(1, val.length - 1);
|
|
681
|
+
attrs[key] = val;
|
|
775
682
|
}
|
|
776
683
|
}
|
|
777
684
|
this._rawAttrs = attrs;
|
|
@@ -788,13 +695,15 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
788
695
|
delete this._attrs[key];
|
|
789
696
|
}
|
|
790
697
|
// Update rawString
|
|
791
|
-
this.rawAttrs = Object.keys(attrs)
|
|
698
|
+
this.rawAttrs = Object.keys(attrs)
|
|
699
|
+
.map(function (name) {
|
|
792
700
|
var val = JSON.stringify(attrs[name]);
|
|
793
701
|
if (val === undefined || val === 'null') {
|
|
794
702
|
return name;
|
|
795
703
|
}
|
|
796
704
|
return name + "=" + val;
|
|
797
|
-
})
|
|
705
|
+
})
|
|
706
|
+
.join(' ');
|
|
798
707
|
// Update this.id
|
|
799
708
|
if (key === 'id') {
|
|
800
709
|
this.id = '';
|
|
@@ -818,7 +727,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
818
727
|
HTMLElement.prototype.setAttribute = function (key, value) {
|
|
819
728
|
var _this = this;
|
|
820
729
|
if (arguments.length < 2) {
|
|
821
|
-
throw new Error(
|
|
730
|
+
throw new Error("Failed to execute 'setAttribute' on 'Element'");
|
|
822
731
|
}
|
|
823
732
|
var k2 = key.toLowerCase();
|
|
824
733
|
var attrs = this.rawAttributes;
|
|
@@ -834,13 +743,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
834
743
|
this._attrs[k2] = decode(attrs[key]);
|
|
835
744
|
}
|
|
836
745
|
// Update rawString
|
|
837
|
-
this.rawAttrs = Object.keys(attrs)
|
|
746
|
+
this.rawAttrs = Object.keys(attrs)
|
|
747
|
+
.map(function (name) {
|
|
838
748
|
var val = _this.quoteAttribute(attrs[name]);
|
|
839
|
-
if (val === 'null' || val === '""')
|
|
749
|
+
if (val === 'null' || val === '""')
|
|
840
750
|
return name;
|
|
841
|
-
}
|
|
842
751
|
return name + "=" + val;
|
|
843
|
-
})
|
|
752
|
+
})
|
|
753
|
+
.join(' ');
|
|
844
754
|
// Update this.id
|
|
845
755
|
if (key === 'id') {
|
|
846
756
|
this.id = value;
|
|
@@ -861,13 +771,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
861
771
|
delete this._rawAttrs;
|
|
862
772
|
}
|
|
863
773
|
// Update rawString
|
|
864
|
-
this.rawAttrs = Object.keys(attributes)
|
|
774
|
+
this.rawAttrs = Object.keys(attributes)
|
|
775
|
+
.map(function (name) {
|
|
865
776
|
var val = attributes[name];
|
|
866
|
-
if (val === 'null' || val === '""')
|
|
777
|
+
if (val === 'null' || val === '""')
|
|
867
778
|
return name;
|
|
868
|
-
}
|
|
869
779
|
return name + "=" + _this.quoteAttribute(String(val));
|
|
870
|
-
})
|
|
780
|
+
})
|
|
781
|
+
.join(' ');
|
|
871
782
|
};
|
|
872
783
|
HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
|
|
873
784
|
var _a, _b, _c;
|
|
@@ -920,9 +831,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
920
831
|
var i = 0;
|
|
921
832
|
while (i < children.length) {
|
|
922
833
|
var child = children[i++];
|
|
923
|
-
if (this === child)
|
|
834
|
+
if (this === child)
|
|
924
835
|
return children[i] || null;
|
|
925
|
-
}
|
|
926
836
|
}
|
|
927
837
|
return null;
|
|
928
838
|
}
|
|
@@ -964,12 +874,8 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
964
874
|
}(node_1.default));
|
|
965
875
|
exports.default = HTMLElement;
|
|
966
876
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
|
967
|
-
var kMarkupPattern = /<!--[
|
|
968
|
-
|
|
969
|
-
// <([a-z][-.:0-9_a-z]*)\s*\/>
|
|
970
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
|
|
971
|
-
// <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
|
|
972
|
-
var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/ig;
|
|
877
|
+
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
|
|
878
|
+
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
|
|
973
879
|
var kSelfClosingElements = {
|
|
974
880
|
area: true,
|
|
975
881
|
AREA: true,
|
|
@@ -998,7 +904,7 @@ var kSelfClosingElements = {
|
|
|
998
904
|
track: true,
|
|
999
905
|
TRACK: true,
|
|
1000
906
|
wbr: true,
|
|
1001
|
-
WBR: true
|
|
907
|
+
WBR: true,
|
|
1002
908
|
};
|
|
1003
909
|
var kElementsClosedByOpening = {
|
|
1004
910
|
li: { li: true, LI: true },
|
|
@@ -1022,7 +928,7 @@ var kElementsClosedByOpening = {
|
|
|
1022
928
|
h5: { h5: true, H5: true },
|
|
1023
929
|
H5: { h5: true, H5: true },
|
|
1024
930
|
h6: { h6: true, H6: true },
|
|
1025
|
-
H6: { h6: true, H6: true }
|
|
931
|
+
H6: { h6: true, H6: true },
|
|
1026
932
|
};
|
|
1027
933
|
var kElementsClosedByClosing = {
|
|
1028
934
|
li: { ul: true, ol: true, UL: true, OL: true },
|
|
@@ -1038,7 +944,7 @@ var kElementsClosedByClosing = {
|
|
|
1038
944
|
td: { tr: true, table: true, TR: true, TABLE: true },
|
|
1039
945
|
TD: { tr: true, table: true, TR: true, TABLE: true },
|
|
1040
946
|
th: { tr: true, table: true, TR: true, TABLE: true },
|
|
1041
|
-
TH: { tr: true, table: true, TR: true, TABLE: true }
|
|
947
|
+
TH: { tr: true, table: true, TR: true, TABLE: true },
|
|
1042
948
|
};
|
|
1043
949
|
var frameflag = 'documentfragmentcontainer';
|
|
1044
950
|
/**
|
|
@@ -1053,45 +959,39 @@ function base_parse(data, options) {
|
|
|
1053
959
|
script: true,
|
|
1054
960
|
noscript: true,
|
|
1055
961
|
style: true,
|
|
1056
|
-
pre: true
|
|
962
|
+
pre: true,
|
|
1057
963
|
};
|
|
1058
964
|
var element_names = Object.keys(elements);
|
|
1059
|
-
var kBlockTextElements = element_names.map(function (it) {
|
|
1060
|
-
|
|
1061
|
-
});
|
|
1062
|
-
var kIgnoreElements = element_names.filter(function (it) {
|
|
1063
|
-
return elements[it];
|
|
1064
|
-
}).map(function (it) {
|
|
1065
|
-
return new RegExp(it, 'i');
|
|
1066
|
-
});
|
|
965
|
+
var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
966
|
+
var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
|
|
1067
967
|
function element_should_be_ignore(tag) {
|
|
1068
|
-
return kIgnoreElements.some(function (it) {
|
|
1069
|
-
return it.test(tag);
|
|
1070
|
-
});
|
|
968
|
+
return kIgnoreElements.some(function (it) { return it.test(tag); });
|
|
1071
969
|
}
|
|
1072
970
|
function is_block_text_element(tag) {
|
|
1073
|
-
return kBlockTextElements.some(function (it) {
|
|
1074
|
-
return it.test(tag);
|
|
1075
|
-
});
|
|
971
|
+
return kBlockTextElements.some(function (it) { return it.test(tag); });
|
|
1076
972
|
}
|
|
1077
|
-
var createRange = function (startPos, endPos) {
|
|
1078
|
-
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1079
|
-
};
|
|
973
|
+
var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
|
|
1080
974
|
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1081
975
|
var currentParent = root;
|
|
1082
976
|
var stack = [root];
|
|
1083
977
|
var lastTextPos = -1;
|
|
978
|
+
var noNestedTagIndex = undefined;
|
|
1084
979
|
var match;
|
|
1085
980
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1086
981
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
982
|
+
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1087
983
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1088
984
|
var frameFlagOffset = frameflag.length + 2;
|
|
1089
985
|
while ((match = kMarkupPattern.exec(data))) {
|
|
1090
|
-
|
|
986
|
+
// Note: Object destructuring here consistently tests as higher performance than array destructuring
|
|
987
|
+
// eslint-disable-next-line prefer-const
|
|
988
|
+
var matchText = match[0], leadingSlash = match[1], tagName = match[2], attributes = match[3], closingSlash = match[4];
|
|
989
|
+
var matchLength = matchText.length;
|
|
990
|
+
var tagStartPos = kMarkupPattern.lastIndex - matchLength;
|
|
1091
991
|
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1092
992
|
// Add TextNode if content
|
|
1093
993
|
if (lastTextPos > -1) {
|
|
1094
|
-
if (lastTextPos +
|
|
994
|
+
if (lastTextPos + matchLength < tagEndPos) {
|
|
1095
995
|
var text = data.substring(lastTextPos, tagStartPos);
|
|
1096
996
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1097
997
|
}
|
|
@@ -1099,10 +999,10 @@ function base_parse(data, options) {
|
|
|
1099
999
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1100
1000
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1101
1001
|
// Skip frameflag node
|
|
1102
|
-
if (
|
|
1002
|
+
if (tagName === frameflag)
|
|
1103
1003
|
continue;
|
|
1104
1004
|
// Handle comments
|
|
1105
|
-
if (
|
|
1005
|
+
if (matchText[1] === '!') {
|
|
1106
1006
|
if (options.comment) {
|
|
1107
1007
|
// Only keep what is in between <!-- and -->
|
|
1108
1008
|
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
@@ -1112,42 +1012,46 @@ function base_parse(data, options) {
|
|
|
1112
1012
|
}
|
|
1113
1013
|
/* -- Handle tag matching -- */
|
|
1114
1014
|
// Fix tag casing if necessary
|
|
1115
|
-
if (
|
|
1116
|
-
|
|
1015
|
+
if (lowerCaseTagName)
|
|
1016
|
+
tagName = tagName.toLowerCase();
|
|
1117
1017
|
// Handle opening tags (ie. <this> not </that>)
|
|
1118
|
-
if (!
|
|
1018
|
+
if (!leadingSlash) {
|
|
1119
1019
|
/* Populate attributes */
|
|
1120
1020
|
var attrs = {};
|
|
1121
|
-
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(
|
|
1122
|
-
|
|
1021
|
+
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(attributes));) {
|
|
1022
|
+
var key = attMatch[1], val = attMatch[2];
|
|
1023
|
+
var isQuoted = val[0] === "'" || val[0] === "\"";
|
|
1024
|
+
attrs[key.toLowerCase()] = isQuoted ? val.slice(1, val.length - 1) : val;
|
|
1123
1025
|
}
|
|
1124
|
-
var
|
|
1125
|
-
if (!
|
|
1126
|
-
if (kElementsClosedByOpening[
|
|
1026
|
+
var parentTagName = currentParent.rawTagName;
|
|
1027
|
+
if (!closingSlash && kElementsClosedByOpening[parentTagName]) {
|
|
1028
|
+
if (kElementsClosedByOpening[parentTagName][tagName]) {
|
|
1127
1029
|
stack.pop();
|
|
1128
1030
|
currentParent = (0, back_1.default)(stack);
|
|
1129
1031
|
}
|
|
1130
1032
|
}
|
|
1131
|
-
//
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1033
|
+
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1034
|
+
if (tagName === 'a' || tagName === 'A') {
|
|
1035
|
+
if (noNestedTagIndex !== undefined) {
|
|
1036
|
+
stack.splice(noNestedTagIndex);
|
|
1037
|
+
currentParent = (0, back_1.default)(stack);
|
|
1038
|
+
}
|
|
1039
|
+
noNestedTagIndex = stack.length;
|
|
1136
1040
|
}
|
|
1137
1041
|
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1138
|
-
var tagStartPos_1 = tagEndPos_1 -
|
|
1042
|
+
var tagStartPos_1 = tagEndPos_1 - matchLength;
|
|
1139
1043
|
currentParent = currentParent.appendChild(
|
|
1140
1044
|
// Initialize range (end position updated later for closed tags)
|
|
1141
|
-
new HTMLElement(
|
|
1045
|
+
new HTMLElement(tagName, attrs, attributes.slice(1), null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1142
1046
|
stack.push(currentParent);
|
|
1143
|
-
if (is_block_text_element(
|
|
1047
|
+
if (is_block_text_element(tagName)) {
|
|
1144
1048
|
// Find closing tag
|
|
1145
|
-
var closeMarkup = "</" +
|
|
1146
|
-
var closeIndex =
|
|
1049
|
+
var closeMarkup = "</" + tagName + ">";
|
|
1050
|
+
var closeIndex = lowerCaseTagName
|
|
1147
1051
|
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1148
1052
|
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1149
1053
|
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1150
|
-
if (element_should_be_ignore(
|
|
1054
|
+
if (element_should_be_ignore(tagName)) {
|
|
1151
1055
|
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1152
1056
|
if (text.length > 0 && /\S/.test(text)) {
|
|
1153
1057
|
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
@@ -1159,14 +1063,16 @@ function base_parse(data, options) {
|
|
|
1159
1063
|
else {
|
|
1160
1064
|
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1161
1065
|
// Cause to be treated as self-closing, because no close found
|
|
1162
|
-
|
|
1066
|
+
leadingSlash = '/';
|
|
1163
1067
|
}
|
|
1164
1068
|
}
|
|
1165
1069
|
}
|
|
1166
1070
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1167
|
-
if (
|
|
1071
|
+
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1168
1072
|
while (true) {
|
|
1169
|
-
if (
|
|
1073
|
+
if (tagName === 'a' || tagName === 'A')
|
|
1074
|
+
noNestedTagIndex = undefined;
|
|
1075
|
+
if (currentParent.rawTagName === tagName) {
|
|
1170
1076
|
// Update range end for closed tag
|
|
1171
1077
|
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1172
1078
|
stack.pop();
|
|
@@ -1174,10 +1080,10 @@ function base_parse(data, options) {
|
|
|
1174
1080
|
break;
|
|
1175
1081
|
}
|
|
1176
1082
|
else {
|
|
1177
|
-
var
|
|
1083
|
+
var parentTagName = currentParent.tagName;
|
|
1178
1084
|
// Trying to close current tag, and move on
|
|
1179
|
-
if (kElementsClosedByClosing[
|
|
1180
|
-
if (kElementsClosedByClosing[
|
|
1085
|
+
if (kElementsClosedByClosing[parentTagName]) {
|
|
1086
|
+
if (kElementsClosedByClosing[parentTagName][tagName]) {
|
|
1181
1087
|
stack.pop();
|
|
1182
1088
|
currentParent = (0, back_1.default)(stack);
|
|
1183
1089
|
continue;
|