node-html-parser 4.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1188 @@
1
+ "use strict";
2
+ var __extends = (this && this.__extends) || (function () {
3
+ var extendStatics = function (d, b) {
4
+ extendStatics = Object.setPrototypeOf ||
5
+ ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
6
+ function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
7
+ return extendStatics(d, b);
8
+ };
9
+ return function (d, b) {
10
+ if (typeof b !== "function" && b !== null)
11
+ throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
12
+ extendStatics(d, b);
13
+ function __() { this.constructor = d; }
14
+ d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
+ };
16
+ })();
17
+ var __assign = (this && this.__assign) || function () {
18
+ __assign = Object.assign || function(t) {
19
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
20
+ s = arguments[i];
21
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
22
+ t[p] = s[p];
23
+ }
24
+ return t;
25
+ };
26
+ return __assign.apply(this, arguments);
27
+ };
28
+ var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
29
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
30
+ if (ar || !(i in from)) {
31
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
32
+ ar[i] = from[i];
33
+ }
34
+ }
35
+ return to.concat(ar || from);
36
+ };
37
+ var __importDefault = (this && this.__importDefault) || function (mod) {
38
+ return (mod && mod.__esModule) ? mod : { "default": mod };
39
+ };
40
+ Object.defineProperty(exports, "__esModule", { value: true });
41
+ exports.parse = exports.base_parse = void 0;
42
+ var he_1 = __importDefault(require("he"));
43
+ var css_select_1 = require("css-select");
44
+ var node_1 = __importDefault(require("./node"));
45
+ var type_1 = __importDefault(require("./type"));
46
+ var text_1 = __importDefault(require("./text"));
47
+ var matcher_1 = __importDefault(require("../matcher"));
48
+ var back_1 = __importDefault(require("../back"));
49
+ var comment_1 = __importDefault(require("./comment"));
50
+ function decode(val) {
51
+ // clone string
52
+ return JSON.parse(JSON.stringify(he_1.default.decode(val)));
53
+ }
54
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
55
+ var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
56
+ var Dtags = ['details', 'dialog', 'dd', 'div', 'dt'];
57
+ var Ftags = ['fieldset', 'figcaption', 'figure', 'footer', 'form'];
58
+ var tableTags = ['table', 'td', 'tr'];
59
+ var htmlTags = ['address', 'article', 'aside', 'blockquote', 'br', 'hr', 'li', 'main', 'nav', 'ol', 'p', 'pre', 'section', 'ul'];
60
+ var kBlockElements = new Set();
61
+ function addToKBlockElement() {
62
+ var args = [];
63
+ for (var _i = 0; _i < arguments.length; _i++) {
64
+ args[_i] = arguments[_i];
65
+ }
66
+ var addToSet = function (array) {
67
+ for (var index = 0; index < array.length; index++) {
68
+ var element = array[index];
69
+ kBlockElements.add(element);
70
+ kBlockElements.add(element.toUpperCase());
71
+ }
72
+ };
73
+ for (var _a = 0, args_1 = args; _a < args_1.length; _a++) {
74
+ var arg = args_1[_a];
75
+ addToSet(arg);
76
+ }
77
+ }
78
+ addToKBlockElement(Htags, Dtags, Ftags, tableTags, htmlTags);
79
+ var DOMTokenList = /** @class */ (function () {
80
+ function DOMTokenList(valuesInit, afterUpdate) {
81
+ if (valuesInit === void 0) { valuesInit = []; }
82
+ if (afterUpdate === void 0) { afterUpdate = function () { return null; }; }
83
+ this._set = new Set(valuesInit);
84
+ this._afterUpdate = afterUpdate;
85
+ }
86
+ DOMTokenList.prototype._validate = function (c) {
87
+ if (/\s/.test(c)) {
88
+ throw new Error("DOMException in DOMTokenList.add: The token '" + c + "' contains HTML space characters, which are not valid in tokens.");
89
+ }
90
+ };
91
+ DOMTokenList.prototype.add = function (c) {
92
+ this._validate(c);
93
+ this._set.add(c);
94
+ this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
95
+ };
96
+ DOMTokenList.prototype.replace = function (c1, c2) {
97
+ this._validate(c2);
98
+ this._set.delete(c1);
99
+ this._set.add(c2);
100
+ this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
101
+ };
102
+ DOMTokenList.prototype.remove = function (c) {
103
+ this._set.delete(c) && this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
104
+ };
105
+ DOMTokenList.prototype.toggle = function (c) {
106
+ this._validate(c);
107
+ if (this._set.has(c))
108
+ this._set.delete(c);
109
+ else
110
+ this._set.add(c);
111
+ this._afterUpdate(this); // eslint-disable-line @typescript-eslint/no-unsafe-call
112
+ };
113
+ DOMTokenList.prototype.contains = function (c) {
114
+ return this._set.has(c);
115
+ };
116
+ Object.defineProperty(DOMTokenList.prototype, "length", {
117
+ get: function () {
118
+ return this._set.size;
119
+ },
120
+ enumerable: false,
121
+ configurable: true
122
+ });
123
+ DOMTokenList.prototype.values = function () {
124
+ return this._set.values();
125
+ };
126
+ Object.defineProperty(DOMTokenList.prototype, "value", {
127
+ get: function () {
128
+ return Array.from(this._set.values());
129
+ },
130
+ enumerable: false,
131
+ configurable: true
132
+ });
133
+ DOMTokenList.prototype.toString = function () {
134
+ return Array.from(this._set.values()).join(' ');
135
+ };
136
+ return DOMTokenList;
137
+ }());
138
+ /**
139
+ * HTMLElement, which contains a set of children.
140
+ *
141
+ * Note: this is a minimalist implementation, no complete tree
142
+ * structure provided (no parentNode, nextSibling,
143
+ * previousSibling etc).
144
+ * @class HTMLElement
145
+ * @extends {Node}
146
+ */
147
+ var HTMLElement = /** @class */ (function (_super) {
148
+ __extends(HTMLElement, _super);
149
+ /**
150
+ * Creates an instance of HTMLElement.
151
+ * @param keyAttrs id and class attribute
152
+ * @param [rawAttrs] attributes in string
153
+ *
154
+ * @memberof HTMLElement
155
+ */
156
+ function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range) {
157
+ if (rawAttrs === void 0) { rawAttrs = ''; }
158
+ var _this = _super.call(this, parentNode, range) || this;
159
+ _this.rawAttrs = rawAttrs;
160
+ /**
161
+ * Node Type declaration.
162
+ */
163
+ _this.nodeType = type_1.default.ELEMENT_NODE;
164
+ _this.rawTagName = tagName;
165
+ _this.rawAttrs = rawAttrs || '';
166
+ _this.id = keyAttrs.id || '';
167
+ _this.childNodes = [];
168
+ _this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
169
+ );
170
+ if (keyAttrs.id) {
171
+ if (!rawAttrs) {
172
+ _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
173
+ }
174
+ }
175
+ if (keyAttrs.class) {
176
+ if (!rawAttrs) {
177
+ var cls = "class=\"" + _this.classList.toString() + "\"";
178
+ if (_this.rawAttrs) {
179
+ _this.rawAttrs += " " + cls;
180
+ }
181
+ else {
182
+ _this.rawAttrs = cls;
183
+ }
184
+ }
185
+ }
186
+ return _this;
187
+ }
188
+ /**
189
+ * Quote attribute values
190
+ * @param attr attribute value
191
+ * @returns {string} quoted value
192
+ */
193
+ HTMLElement.prototype.quoteAttribute = function (attr) {
194
+ if (attr === null) {
195
+ return 'null';
196
+ }
197
+ return JSON.stringify(attr.replace(/"/g, '&quot;'));
198
+ };
199
+ /**
200
+ * Remove current element
201
+ */
202
+ HTMLElement.prototype.remove = function () {
203
+ var _this = this;
204
+ if (this.parentNode) {
205
+ var children = this.parentNode.childNodes;
206
+ this.parentNode.childNodes = children.filter(function (child) {
207
+ return _this !== child;
208
+ });
209
+ }
210
+ };
211
+ /**
212
+ * Remove Child element from childNodes array
213
+ * @param {HTMLElement} node node to remove
214
+ */
215
+ HTMLElement.prototype.removeChild = function (node) {
216
+ this.childNodes = this.childNodes.filter(function (child) {
217
+ return child !== node;
218
+ });
219
+ };
220
+ /**
221
+ * Exchanges given child with new child
222
+ * @param {HTMLElement} oldNode node to exchange
223
+ * @param {HTMLElement} newNode new node
224
+ */
225
+ HTMLElement.prototype.exchangeChild = function (oldNode, newNode) {
226
+ var children = this.childNodes;
227
+ this.childNodes = children.map(function (child) {
228
+ if (child === oldNode) {
229
+ return newNode;
230
+ }
231
+ return child;
232
+ });
233
+ };
234
+ Object.defineProperty(HTMLElement.prototype, "tagName", {
235
+ get: function () {
236
+ return this.rawTagName ? this.rawTagName.toUpperCase() : this.rawTagName;
237
+ },
238
+ enumerable: false,
239
+ configurable: true
240
+ });
241
+ Object.defineProperty(HTMLElement.prototype, "localName", {
242
+ get: function () {
243
+ return this.rawTagName.toLowerCase();
244
+ },
245
+ enumerable: false,
246
+ configurable: true
247
+ });
248
+ Object.defineProperty(HTMLElement.prototype, "rawText", {
249
+ /**
250
+ * Get escpaed (as-it) text value of current node and its children.
251
+ * @return {string} text content
252
+ */
253
+ get: function () {
254
+ return this.childNodes.reduce(function (pre, cur) {
255
+ return (pre += cur.rawText);
256
+ }, '');
257
+ },
258
+ enumerable: false,
259
+ configurable: true
260
+ });
261
+ Object.defineProperty(HTMLElement.prototype, "textContent", {
262
+ get: function () {
263
+ return decode(this.rawText);
264
+ },
265
+ set: function (val) {
266
+ var content = [new text_1.default(val, this)];
267
+ this.childNodes = content;
268
+ },
269
+ enumerable: false,
270
+ configurable: true
271
+ });
272
+ Object.defineProperty(HTMLElement.prototype, "text", {
273
+ /**
274
+ * Get unescaped text value of current node and its children.
275
+ * @return {string} text content
276
+ */
277
+ get: function () {
278
+ return decode(this.rawText);
279
+ },
280
+ enumerable: false,
281
+ configurable: true
282
+ });
283
+ Object.defineProperty(HTMLElement.prototype, "structuredText", {
284
+ /**
285
+ * Get structured Text (with '\n' etc.)
286
+ * @return {string} structured text
287
+ */
288
+ get: function () {
289
+ var currentBlock = [];
290
+ var blocks = [currentBlock];
291
+ function dfs(node) {
292
+ if (node.nodeType === type_1.default.ELEMENT_NODE) {
293
+ if (kBlockElements.has(node.rawTagName)) {
294
+ if (currentBlock.length > 0) {
295
+ blocks.push((currentBlock = []));
296
+ }
297
+ node.childNodes.forEach(dfs);
298
+ if (currentBlock.length > 0) {
299
+ blocks.push((currentBlock = []));
300
+ }
301
+ }
302
+ else {
303
+ node.childNodes.forEach(dfs);
304
+ }
305
+ }
306
+ else if (node.nodeType === type_1.default.TEXT_NODE) {
307
+ if (node.isWhitespace) {
308
+ // Whitespace node, postponed output
309
+ currentBlock.prependWhitespace = true;
310
+ }
311
+ else {
312
+ var text = node.trimmedText;
313
+ if (currentBlock.prependWhitespace) {
314
+ text = " " + text;
315
+ currentBlock.prependWhitespace = false;
316
+ }
317
+ currentBlock.push(text);
318
+ }
319
+ }
320
+ }
321
+ dfs(this);
322
+ return blocks
323
+ .map(function (block) {
324
+ return block.join('').replace(/\s{2,}/g, ' '); // Normalize each line's whitespace
325
+ })
326
+ .join('\n')
327
+ .replace(/\s+$/, ''); // trimRight;
328
+ },
329
+ enumerable: false,
330
+ configurable: true
331
+ });
332
+ HTMLElement.prototype.toString = function () {
333
+ var tag = this.rawTagName;
334
+ if (tag) {
335
+ // const void_tags = new Set('area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr'.split('|'));
336
+ // const is_void = void_tags.has(tag);
337
+ var is_void = /^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$/i.test(tag);
338
+ var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
339
+ if (is_void) {
340
+ return "<" + tag + attrs + ">";
341
+ }
342
+ return "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
343
+ }
344
+ return this.innerHTML;
345
+ };
346
+ Object.defineProperty(HTMLElement.prototype, "innerHTML", {
347
+ get: function () {
348
+ return this.childNodes
349
+ .map(function (child) {
350
+ return child.toString();
351
+ })
352
+ .join('');
353
+ },
354
+ set: function (content) {
355
+ //const r = parse(content, global.options); // TODO global.options ?
356
+ var r = parse(content);
357
+ this.childNodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
358
+ },
359
+ enumerable: false,
360
+ configurable: true
361
+ });
362
+ HTMLElement.prototype.set_content = function (content, options) {
363
+ if (options === void 0) { options = {}; }
364
+ if (content instanceof node_1.default) {
365
+ content = [content];
366
+ }
367
+ else if (typeof content == 'string') {
368
+ var r = parse(content, options);
369
+ content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
370
+ }
371
+ this.childNodes = content;
372
+ };
373
+ HTMLElement.prototype.replaceWith = function () {
374
+ var _this = this;
375
+ var nodes = [];
376
+ for (var _i = 0; _i < arguments.length; _i++) {
377
+ nodes[_i] = arguments[_i];
378
+ }
379
+ var content = nodes
380
+ .map(function (node) {
381
+ if (node instanceof node_1.default) {
382
+ return [node];
383
+ }
384
+ else if (typeof node == 'string') {
385
+ // const r = parse(content, global.options); // TODO global.options ?
386
+ var r = parse(node);
387
+ return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
388
+ }
389
+ return [];
390
+ })
391
+ .flat();
392
+ var idx = this.parentNode.childNodes.findIndex(function (child) {
393
+ return child === _this;
394
+ });
395
+ this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx), true), content, true), this.parentNode.childNodes.slice(idx + 1), true);
396
+ };
397
+ Object.defineProperty(HTMLElement.prototype, "outerHTML", {
398
+ get: function () {
399
+ return this.toString();
400
+ },
401
+ enumerable: false,
402
+ configurable: true
403
+ });
404
+ /**
405
+ * Trim element from right (in block) after seeing pattern in a TextNode.
406
+ * @param {RegExp} pattern pattern to find
407
+ * @return {HTMLElement} reference to current node
408
+ */
409
+ HTMLElement.prototype.trimRight = function (pattern) {
410
+ for (var i = 0; i < this.childNodes.length; i++) {
411
+ var childNode = this.childNodes[i];
412
+ if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
413
+ childNode.trimRight(pattern);
414
+ }
415
+ else {
416
+ var index = childNode.rawText.search(pattern);
417
+ if (index > -1) {
418
+ childNode.rawText = childNode.rawText.substr(0, index);
419
+ // trim all following nodes.
420
+ this.childNodes.length = i + 1;
421
+ }
422
+ }
423
+ }
424
+ return this;
425
+ };
426
+ Object.defineProperty(HTMLElement.prototype, "structure", {
427
+ /**
428
+ * Get DOM structure
429
+ * @return {string} strucutre
430
+ */
431
+ get: function () {
432
+ var res = [];
433
+ var indention = 0;
434
+ function write(str) {
435
+ res.push(' '.repeat(indention) + str);
436
+ }
437
+ function dfs(node) {
438
+ var idStr = node.id ? "#" + node.id : '';
439
+ var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
440
+ write("" + node.rawTagName + idStr + classStr);
441
+ indention++;
442
+ node.childNodes.forEach(function (childNode) {
443
+ if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
444
+ dfs(childNode);
445
+ }
446
+ else if (childNode.nodeType === type_1.default.TEXT_NODE) {
447
+ if (!childNode.isWhitespace) {
448
+ write('#text');
449
+ }
450
+ }
451
+ });
452
+ indention--;
453
+ }
454
+ dfs(this);
455
+ return res.join('\n');
456
+ },
457
+ enumerable: false,
458
+ configurable: true
459
+ });
460
+ /**
461
+ * Remove whitespaces in this sub tree.
462
+ * @return {HTMLElement} pointer to this
463
+ */
464
+ HTMLElement.prototype.removeWhitespace = function () {
465
+ var _this = this;
466
+ var o = 0;
467
+ this.childNodes.forEach(function (node) {
468
+ if (node.nodeType === type_1.default.TEXT_NODE) {
469
+ if (node.isWhitespace) {
470
+ return;
471
+ }
472
+ node.rawText = node.trimmedRawText;
473
+ }
474
+ else if (node.nodeType === type_1.default.ELEMENT_NODE) {
475
+ node.removeWhitespace();
476
+ }
477
+ _this.childNodes[o++] = node;
478
+ });
479
+ this.childNodes.length = o;
480
+ return this;
481
+ };
482
+ /**
483
+ * Query CSS selector to find matching nodes.
484
+ * @param {string} selector Simplified CSS selector
485
+ * @return {HTMLElement[]} matching elements
486
+ */
487
+ HTMLElement.prototype.querySelectorAll = function (selector) {
488
+ return (0, css_select_1.selectAll)(selector, this, {
489
+ xmlMode: true,
490
+ adapter: matcher_1.default,
491
+ });
492
+ // let matcher: Matcher;
493
+ // if (selector instanceof Matcher) {
494
+ // matcher = selector;
495
+ // matcher.reset();
496
+ // } else {
497
+ // if (selector.includes(',')) {
498
+ // const selectors = selector.split(',');
499
+ // return Array.from(selectors.reduce((pre, cur) => {
500
+ // const result = this.querySelectorAll(cur.trim());
501
+ // return result.reduce((p, c) => {
502
+ // return p.add(c);
503
+ // }, pre);
504
+ // }, new Set<HTMLElement>()));
505
+ // }
506
+ // matcher = new Matcher(selector);
507
+ // }
508
+ // interface IStack {
509
+ // 0: Node; // node
510
+ // 1: number; // children
511
+ // 2: boolean; // found flag
512
+ // }
513
+ // const stack = [] as IStack[];
514
+ // return this.childNodes.reduce((res, cur) => {
515
+ // stack.push([cur, 0, false]);
516
+ // while (stack.length) {
517
+ // const state = arr_back(stack); // get last element
518
+ // const el = state[0];
519
+ // if (state[1] === 0) {
520
+ // // Seen for first time.
521
+ // if (el.nodeType !== NodeType.ELEMENT_NODE) {
522
+ // stack.pop();
523
+ // continue;
524
+ // }
525
+ // const html_el = el as HTMLElement;
526
+ // state[2] = matcher.advance(html_el);
527
+ // if (state[2]) {
528
+ // if (matcher.matched) {
529
+ // res.push(html_el);
530
+ // res.push(...(html_el.querySelectorAll(selector)));
531
+ // // no need to go further.
532
+ // matcher.rewind();
533
+ // stack.pop();
534
+ // continue;
535
+ // }
536
+ // }
537
+ // }
538
+ // if (state[1] < el.childNodes.length) {
539
+ // stack.push([el.childNodes[state[1]++], 0, false]);
540
+ // } else {
541
+ // if (state[2]) {
542
+ // matcher.rewind();
543
+ // }
544
+ // stack.pop();
545
+ // }
546
+ // }
547
+ // return res;
548
+ // }, [] as HTMLElement[]);
549
+ };
550
+ /**
551
+ * Query CSS Selector to find matching node.
552
+ * @param {string} selector Simplified CSS selector
553
+ * @return {HTMLElement} matching node
554
+ */
555
+ HTMLElement.prototype.querySelector = function (selector) {
556
+ return (0, css_select_1.selectOne)(selector, this, {
557
+ xmlMode: true,
558
+ adapter: matcher_1.default,
559
+ });
560
+ // let matcher: Matcher;
561
+ // if (selector instanceof Matcher) {
562
+ // matcher = selector;
563
+ // matcher.reset();
564
+ // } else {
565
+ // matcher = new Matcher(selector);
566
+ // }
567
+ // const stack = [] as { 0: Node; 1: 0 | 1; 2: boolean }[];
568
+ // for (const node of this.childNodes) {
569
+ // stack.push([node, 0, false]);
570
+ // while (stack.length) {
571
+ // const state = arr_back(stack);
572
+ // const el = state[0];
573
+ // if (state[1] === 0) {
574
+ // // Seen for first time.
575
+ // if (el.nodeType !== NodeType.ELEMENT_NODE) {
576
+ // stack.pop();
577
+ // continue;
578
+ // }
579
+ // state[2] = matcher.advance(el as HTMLElement);
580
+ // if (state[2]) {
581
+ // if (matcher.matched) {
582
+ // return el as HTMLElement;
583
+ // }
584
+ // }
585
+ // }
586
+ // if (state[1] < el.childNodes.length) {
587
+ // stack.push([el.childNodes[state[1]++], 0, false]);
588
+ // } else {
589
+ // if (state[2]) {
590
+ // matcher.rewind();
591
+ // }
592
+ // stack.pop();
593
+ // }
594
+ // }
595
+ // }
596
+ // return null;
597
+ };
598
+ /**
599
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
600
+ * @param selector a DOMString containing a selector list
601
+ */
602
+ HTMLElement.prototype.closest = function (selector) {
603
+ var mapChild = new Map();
604
+ var el = this;
605
+ var old = null;
606
+ function findOne(test, elems) {
607
+ var elem = null;
608
+ for (var i = 0, l = elems.length; i < l && !elem; i++) {
609
+ var el_1 = elems[i];
610
+ if (test(el_1)) {
611
+ elem = el_1;
612
+ }
613
+ else {
614
+ var child = mapChild.get(el_1);
615
+ if (child) {
616
+ elem = findOne(test, [child]);
617
+ }
618
+ }
619
+ }
620
+ return elem;
621
+ }
622
+ while (el) {
623
+ mapChild.set(el, old);
624
+ old = el;
625
+ el = el.parentNode;
626
+ }
627
+ el = this;
628
+ while (el) {
629
+ var e = (0, css_select_1.selectOne)(selector, el, {
630
+ xmlMode: true,
631
+ adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
632
+ var child = mapChild.get(node);
633
+ return child && [child];
634
+ }, getSiblings: function (node) {
635
+ return [node];
636
+ }, findOne: findOne, findAll: function () {
637
+ return [];
638
+ } }),
639
+ });
640
+ if (e) {
641
+ return e;
642
+ }
643
+ el = el.parentNode;
644
+ }
645
+ return null;
646
+ };
647
+ /**
648
+ * Append a child node to childNodes
649
+ * @param {Node} node node to append
650
+ * @return {Node} node appended
651
+ */
652
+ HTMLElement.prototype.appendChild = function (node) {
653
+ // node.parentNode = this;
654
+ this.childNodes.push(node);
655
+ node.parentNode = this;
656
+ return node;
657
+ };
658
+ Object.defineProperty(HTMLElement.prototype, "firstChild", {
659
+ /**
660
+ * Get first child node
661
+ * @return {Node} first child node
662
+ */
663
+ get: function () {
664
+ return this.childNodes[0];
665
+ },
666
+ enumerable: false,
667
+ configurable: true
668
+ });
669
+ Object.defineProperty(HTMLElement.prototype, "lastChild", {
670
+ /**
671
+ * Get last child node
672
+ * @return {Node} last child node
673
+ */
674
+ get: function () {
675
+ return (0, back_1.default)(this.childNodes);
676
+ },
677
+ enumerable: false,
678
+ configurable: true
679
+ });
680
+ Object.defineProperty(HTMLElement.prototype, "attrs", {
681
+ /**
682
+ * Get attributes
683
+ * @access private
684
+ * @return {Object} parsed and unescaped attributes
685
+ */
686
+ get: function () {
687
+ if (this._attrs) {
688
+ return this._attrs;
689
+ }
690
+ this._attrs = {};
691
+ var attrs = this.rawAttributes;
692
+ for (var key in attrs) {
693
+ var val = attrs[key] || '';
694
+ this._attrs[key.toLowerCase()] = decode(val);
695
+ }
696
+ return this._attrs;
697
+ },
698
+ enumerable: false,
699
+ configurable: true
700
+ });
701
+ Object.defineProperty(HTMLElement.prototype, "attributes", {
702
+ get: function () {
703
+ var ret_attrs = {};
704
+ var attrs = this.rawAttributes;
705
+ for (var key in attrs) {
706
+ var val = attrs[key] || '';
707
+ ret_attrs[key] = decode(val);
708
+ }
709
+ return ret_attrs;
710
+ },
711
+ enumerable: false,
712
+ configurable: true
713
+ });
714
+ Object.defineProperty(HTMLElement.prototype, "rawAttributes", {
715
+ /**
716
+ * Get escaped (as-it) attributes
717
+ * @return {Object} parsed attributes
718
+ */
719
+ get: function () {
720
+ if (this._rawAttrs) {
721
+ return this._rawAttrs;
722
+ }
723
+ var attrs = {};
724
+ if (this.rawAttrs) {
725
+ var re = /([a-z()#][a-z0-9-_:()#]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/gi;
726
+ var match = void 0;
727
+ while ((match = re.exec(this.rawAttrs))) {
728
+ attrs[match[1]] = match[2] || match[3] || match[4] || null;
729
+ }
730
+ }
731
+ this._rawAttrs = attrs;
732
+ return attrs;
733
+ },
734
+ enumerable: false,
735
+ configurable: true
736
+ });
737
+ HTMLElement.prototype.removeAttribute = function (key) {
738
+ var attrs = this.rawAttributes;
739
+ delete attrs[key];
740
+ // Update this.attribute
741
+ if (this._attrs) {
742
+ delete this._attrs[key];
743
+ }
744
+ // Update rawString
745
+ this.rawAttrs = Object.keys(attrs)
746
+ .map(function (name) {
747
+ var val = JSON.stringify(attrs[name]);
748
+ if (val === undefined || val === 'null') {
749
+ return name;
750
+ }
751
+ return name + "=" + val;
752
+ })
753
+ .join(' ');
754
+ // Update this.id
755
+ if (key === 'id') {
756
+ this.id = '';
757
+ }
758
+ };
759
+ HTMLElement.prototype.hasAttribute = function (key) {
760
+ return key.toLowerCase() in this.attrs;
761
+ };
762
+ /**
763
+ * Get an attribute
764
+ * @return {string} value of the attribute
765
+ */
766
+ HTMLElement.prototype.getAttribute = function (key) {
767
+ return this.attrs[key.toLowerCase()];
768
+ };
769
+ /**
770
+ * Set an attribute value to the HTMLElement
771
+ * @param {string} key The attribute name
772
+ * @param {string} value The value to set, or null / undefined to remove an attribute
773
+ */
774
+ HTMLElement.prototype.setAttribute = function (key, value) {
775
+ var _this = this;
776
+ if (arguments.length < 2) {
777
+ throw new Error("Failed to execute 'setAttribute' on 'Element'");
778
+ }
779
+ var k2 = key.toLowerCase();
780
+ var attrs = this.rawAttributes;
781
+ for (var k in attrs) {
782
+ if (k.toLowerCase() === k2) {
783
+ key = k;
784
+ break;
785
+ }
786
+ }
787
+ attrs[key] = String(value);
788
+ // update this.attrs
789
+ if (this._attrs) {
790
+ this._attrs[k2] = decode(attrs[key]);
791
+ }
792
+ // Update rawString
793
+ this.rawAttrs = Object.keys(attrs)
794
+ .map(function (name) {
795
+ var val = _this.quoteAttribute(attrs[name]);
796
+ if (val === 'null' || val === '""')
797
+ return name;
798
+ return name + "=" + val;
799
+ })
800
+ .join(' ');
801
+ // Update this.id
802
+ if (key === 'id') {
803
+ this.id = value;
804
+ }
805
+ };
806
+ /**
807
+ * Replace all the attributes of the HTMLElement by the provided attributes
808
+ * @param {Attributes} attributes the new attribute set
809
+ */
810
+ HTMLElement.prototype.setAttributes = function (attributes) {
811
+ var _this = this;
812
+ // Invalidate current this.attributes
813
+ if (this._attrs) {
814
+ delete this._attrs;
815
+ }
816
+ // Invalidate current this.rawAttributes
817
+ if (this._rawAttrs) {
818
+ delete this._rawAttrs;
819
+ }
820
+ // Update rawString
821
+ this.rawAttrs = Object.keys(attributes)
822
+ .map(function (name) {
823
+ var val = attributes[name];
824
+ if (val === 'null' || val === '""')
825
+ return name;
826
+ return name + "=" + _this.quoteAttribute(String(val));
827
+ })
828
+ .join(' ');
829
+ };
830
+ HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
831
+ var _a, _b, _c;
832
+ var _this = this;
833
+ if (arguments.length < 2) {
834
+ throw new Error('2 arguments required');
835
+ }
836
+ var p = parse(html);
837
+ if (where === 'afterend') {
838
+ var idx = this.parentNode.childNodes.findIndex(function (child) {
839
+ return child === _this;
840
+ });
841
+ (_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes, false));
842
+ p.childNodes.forEach(function (n) {
843
+ if (n instanceof HTMLElement) {
844
+ n.parentNode = _this.parentNode;
845
+ }
846
+ });
847
+ }
848
+ else if (where === 'afterbegin') {
849
+ (_b = this.childNodes).unshift.apply(_b, p.childNodes);
850
+ }
851
+ else if (where === 'beforeend') {
852
+ p.childNodes.forEach(function (n) {
853
+ _this.appendChild(n);
854
+ });
855
+ }
856
+ else if (where === 'beforebegin') {
857
+ var idx = this.parentNode.childNodes.findIndex(function (child) {
858
+ return child === _this;
859
+ });
860
+ (_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes, false));
861
+ p.childNodes.forEach(function (n) {
862
+ if (n instanceof HTMLElement) {
863
+ n.parentNode = _this.parentNode;
864
+ }
865
+ });
866
+ }
867
+ else {
868
+ throw new Error("The value provided ('" + where + "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'");
869
+ }
870
+ // if (!where || html === undefined || html === null) {
871
+ // return;
872
+ // }
873
+ };
874
+ Object.defineProperty(HTMLElement.prototype, "nextSibling", {
875
+ get: function () {
876
+ if (this.parentNode) {
877
+ var children = this.parentNode.childNodes;
878
+ var i = 0;
879
+ while (i < children.length) {
880
+ var child = children[i++];
881
+ if (this === child)
882
+ return children[i] || null;
883
+ }
884
+ return null;
885
+ }
886
+ },
887
+ enumerable: false,
888
+ configurable: true
889
+ });
890
+ Object.defineProperty(HTMLElement.prototype, "nextElementSibling", {
891
+ get: function () {
892
+ if (this.parentNode) {
893
+ var children = this.parentNode.childNodes;
894
+ var i = 0;
895
+ var find = false;
896
+ while (i < children.length) {
897
+ var child = children[i++];
898
+ if (find) {
899
+ if (child instanceof HTMLElement) {
900
+ return child || null;
901
+ }
902
+ }
903
+ else if (this === child) {
904
+ find = true;
905
+ }
906
+ }
907
+ return null;
908
+ }
909
+ },
910
+ enumerable: false,
911
+ configurable: true
912
+ });
913
+ Object.defineProperty(HTMLElement.prototype, "classNames", {
914
+ get: function () {
915
+ return this.classList.toString();
916
+ },
917
+ enumerable: false,
918
+ configurable: true
919
+ });
920
+ return HTMLElement;
921
+ }(node_1.default));
922
+ exports.default = HTMLElement;
923
+ // https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
924
+ var kMarkupPattern = /<!--[^]*?(?=-->)-->|<(\/?)([a-z][-.:0-9_a-z]*)\s*((?=[/>]*?)|(?:.*?[\s\d/'"])|(?:.*?[\w]))(\/?)>/gi;
925
+ // <(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
926
+ // <([a-z][-.:0-9_a-z]*)\s*\/>
927
+ // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>
928
+ // <(area|base|br|col|hr|img|input|link|meta|source)\s*(.*)\/?>|<(?<tag>[^\s]*)(.*)>(.*)</\k<tag>>
929
+ var kAttributePattern = /(^|\s)(id|class)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/gi;
930
+ var kSelfClosingElements = {
931
+ area: true,
932
+ AREA: true,
933
+ base: true,
934
+ BASE: true,
935
+ br: true,
936
+ BR: true,
937
+ col: true,
938
+ COL: true,
939
+ hr: true,
940
+ HR: true,
941
+ img: true,
942
+ IMG: true,
943
+ input: true,
944
+ INPUT: true,
945
+ link: true,
946
+ LINK: true,
947
+ meta: true,
948
+ META: true,
949
+ source: true,
950
+ SOURCE: true,
951
+ embed: true,
952
+ EMBED: true,
953
+ param: true,
954
+ PARAM: true,
955
+ track: true,
956
+ TRACK: true,
957
+ wbr: true,
958
+ WBR: true,
959
+ };
960
+ var kElementsClosedByOpening = {
961
+ li: { li: true, LI: true },
962
+ LI: { li: true, LI: true },
963
+ p: { p: true, div: true, P: true, DIV: true },
964
+ P: { p: true, div: true, P: true, DIV: true },
965
+ b: { div: true, DIV: true },
966
+ B: { div: true, DIV: true },
967
+ td: { td: true, th: true, TD: true, TH: true },
968
+ TD: { td: true, th: true, TD: true, TH: true },
969
+ th: { td: true, th: true, TD: true, TH: true },
970
+ TH: { td: true, th: true, TD: true, TH: true },
971
+ h1: { h1: true, H1: true },
972
+ H1: { h1: true, H1: true },
973
+ h2: { h2: true, H2: true },
974
+ H2: { h2: true, H2: true },
975
+ h3: { h3: true, H3: true },
976
+ H3: { h3: true, H3: true },
977
+ h4: { h4: true, H4: true },
978
+ H4: { h4: true, H4: true },
979
+ h5: { h5: true, H5: true },
980
+ H5: { h5: true, H5: true },
981
+ h6: { h6: true, H6: true },
982
+ H6: { h6: true, H6: true },
983
+ };
984
+ var kElementsClosedByClosing = {
985
+ li: { ul: true, ol: true, UL: true, OL: true },
986
+ LI: { ul: true, ol: true, UL: true, OL: true },
987
+ a: { div: true, DIV: true },
988
+ A: { div: true, DIV: true },
989
+ b: { div: true, DIV: true },
990
+ B: { div: true, DIV: true },
991
+ i: { div: true, DIV: true },
992
+ I: { div: true, DIV: true },
993
+ p: { div: true, DIV: true },
994
+ P: { div: true, DIV: true },
995
+ td: { tr: true, table: true, TR: true, TABLE: true },
996
+ TD: { tr: true, table: true, TR: true, TABLE: true },
997
+ th: { tr: true, table: true, TR: true, TABLE: true },
998
+ TH: { tr: true, table: true, TR: true, TABLE: true },
999
+ };
1000
+ var frameflag = 'documentfragmentcontainer';
1001
+ /**
1002
+ * Parses HTML and returns a root element
1003
+ * Parse a chuck of HTML source.
1004
+ * @param {string} data html
1005
+ * @return {HTMLElement} root element
1006
+ */
1007
+ function base_parse(data, options) {
1008
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1009
+ var elements = options.blockTextElements || {
1010
+ script: true,
1011
+ noscript: true,
1012
+ style: true,
1013
+ pre: true,
1014
+ };
1015
+ var element_names = Object.keys(elements);
1016
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp(it, 'i'); });
1017
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp(it, 'i'); });
1018
+ function element_should_be_ignore(tag) {
1019
+ return kIgnoreElements.some(function (it) { return it.test(tag); });
1020
+ }
1021
+ function is_block_text_element(tag) {
1022
+ return kBlockTextElements.some(function (it) { return it.test(tag); });
1023
+ }
1024
+ var createRange = function (startPos, endPos) { return [startPos - frameFlagOffset, endPos - frameFlagOffset]; };
1025
+ var root = new HTMLElement(null, {}, '', null, [0, data.length]);
1026
+ var currentParent = root;
1027
+ var stack = [root];
1028
+ var lastTextPos = -1;
1029
+ var noNestedTagIndex = undefined;
1030
+ var match;
1031
+ // https://github.com/taoqf/node-html-parser/issues/38
1032
+ data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1033
+ var dataEndPos = data.length - (frameflag.length + 2);
1034
+ var frameFlagOffset = frameflag.length + 2;
1035
+ while ((match = kMarkupPattern.exec(data))) {
1036
+ var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
1037
+ var tagEndPos = kMarkupPattern.lastIndex;
1038
+ // Add TextNode if content
1039
+ if (lastTextPos > -1) {
1040
+ if (lastTextPos + match[0].length < tagEndPos) {
1041
+ var text = data.substring(lastTextPos, tagStartPos);
1042
+ currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
1043
+ }
1044
+ }
1045
+ lastTextPos = kMarkupPattern.lastIndex;
1046
+ // https://github.com/taoqf/node-html-parser/issues/38
1047
+ // Skip frameflag node
1048
+ if (match[2] === frameflag)
1049
+ continue;
1050
+ // Handle comments
1051
+ if (match[0][1] === '!') {
1052
+ if (options.comment) {
1053
+ // Only keep what is in between <!-- and -->
1054
+ var text = data.substring(tagStartPos + 4, tagEndPos - 3);
1055
+ currentParent.appendChild(new comment_1.default(text, currentParent, createRange(tagStartPos, tagEndPos)));
1056
+ }
1057
+ continue;
1058
+ }
1059
+ /* -- Handle tag matching -- */
1060
+ // Fix tag casing if necessary
1061
+ if (options.lowerCaseTagName)
1062
+ match[2] = match[2].toLowerCase();
1063
+ // Handle opening tags (ie. <this> not </that>)
1064
+ if (!match[1]) {
1065
+ /* Populate attributes */
1066
+ var attrs = {};
1067
+ for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
1068
+ attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
1069
+ }
1070
+ var tagName = currentParent.rawTagName;
1071
+ if (!match[4] && kElementsClosedByOpening[tagName]) {
1072
+ if (kElementsClosedByOpening[tagName][match[2]]) {
1073
+ stack.pop();
1074
+ currentParent = (0, back_1.default)(stack);
1075
+ }
1076
+ }
1077
+ // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1078
+ if (match[2] === 'a' || match[2] === 'A') {
1079
+ if (noNestedTagIndex !== undefined) {
1080
+ stack.splice(noNestedTagIndex);
1081
+ currentParent = (0, back_1.default)(stack);
1082
+ }
1083
+ noNestedTagIndex = stack.length;
1084
+ }
1085
+ var tagEndPos_1 = kMarkupPattern.lastIndex;
1086
+ var tagStartPos_1 = tagEndPos_1 - match[0].length;
1087
+ currentParent = currentParent.appendChild(
1088
+ // Initialize range (end position updated later for closed tags)
1089
+ new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
1090
+ stack.push(currentParent);
1091
+ if (is_block_text_element(match[2])) {
1092
+ // Find closing tag
1093
+ var closeMarkup = "</" + match[2] + ">";
1094
+ var closeIndex = options.lowerCaseTagName
1095
+ ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1096
+ : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
1097
+ var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
1098
+ if (element_should_be_ignore(match[2])) {
1099
+ var text = data.substring(tagEndPos_1, textEndPos);
1100
+ if (text.length > 0 && /\S/.test(text)) {
1101
+ currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
1102
+ }
1103
+ }
1104
+ if (closeIndex === -1) {
1105
+ lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
1106
+ }
1107
+ else {
1108
+ lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
1109
+ // Cause to be treated as self-closing, because no close found
1110
+ match[1] = 'true';
1111
+ }
1112
+ }
1113
+ }
1114
+ // Handle closing tags or self-closed elements (ie </tag> or <br>)
1115
+ if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
1116
+ while (true) {
1117
+ if (match[2] === 'a' || match[2] === 'A')
1118
+ noNestedTagIndex = undefined;
1119
+ if (currentParent.rawTagName === match[2]) {
1120
+ // Update range end for closed tag
1121
+ currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
1122
+ stack.pop();
1123
+ currentParent = (0, back_1.default)(stack);
1124
+ break;
1125
+ }
1126
+ else {
1127
+ var tagName = currentParent.tagName;
1128
+ // Trying to close current tag, and move on
1129
+ if (kElementsClosedByClosing[tagName]) {
1130
+ if (kElementsClosedByClosing[tagName][match[2]]) {
1131
+ stack.pop();
1132
+ currentParent = (0, back_1.default)(stack);
1133
+ continue;
1134
+ }
1135
+ }
1136
+ // Use aggressive strategy to handle unmatching markups.
1137
+ break;
1138
+ }
1139
+ }
1140
+ }
1141
+ }
1142
+ return stack;
1143
+ }
1144
+ exports.base_parse = base_parse;
1145
+ /**
1146
+ * Parses HTML and returns a root element
1147
+ * Parse a chuck of HTML source.
1148
+ */
1149
+ function parse(data, options) {
1150
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1151
+ var stack = base_parse(data, options);
1152
+ var root = stack[0];
1153
+ var _loop_1 = function () {
1154
+ // Handle each error elements.
1155
+ var last = stack.pop();
1156
+ var oneBefore = (0, back_1.default)(stack);
1157
+ if (last.parentNode && last.parentNode.parentNode) {
1158
+ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
1159
+ // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1160
+ oneBefore.removeChild(last);
1161
+ last.childNodes.forEach(function (child) {
1162
+ oneBefore.parentNode.appendChild(child);
1163
+ });
1164
+ stack.pop();
1165
+ }
1166
+ else {
1167
+ // Single error <div> <h3> </div> handle: Just removes <h3>
1168
+ oneBefore.removeChild(last);
1169
+ last.childNodes.forEach(function (child) {
1170
+ oneBefore.appendChild(child);
1171
+ });
1172
+ }
1173
+ }
1174
+ else {
1175
+ // If it's final element just skip.
1176
+ }
1177
+ };
1178
+ while (stack.length > 1) {
1179
+ _loop_1();
1180
+ }
1181
+ // response.childNodes.forEach((node) => {
1182
+ // if (node instanceof HTMLElement) {
1183
+ // node.parentNode = null;
1184
+ // }
1185
+ // });
1186
+ return root;
1187
+ }
1188
+ exports.parse = parse;