node-html-parser 6.1.15-0 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/README.md +5 -6
- package/dist/index.js +8 -9
- package/dist/main.js +70 -26
- package/dist/matcher.js +26 -26
- package/dist/nodes/comment.js +22 -46
- package/dist/nodes/html.d.ts +3 -1
- package/dist/nodes/html.js +579 -725
- package/dist/nodes/node.js +18 -29
- package/dist/nodes/text.js +61 -100
- package/dist/valid.js +3 -4
- package/dist/void-tag.js +13 -15
- package/package.json +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,39 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
### [7.0.2](https://github.com/taoqf/node-fast-html-parser/compare/v7.0.1...v7.0.2) (2026-01-07)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Bug Fixes
|
|
9
|
+
|
|
10
|
+
* [#227](https://github.com/taoqf/node-fast-html-parser/issues/227) ([51528c4](https://github.com/taoqf/node-fast-html-parser/commit/51528c41ef2648d6c4dc1aecd14ee9d2b0083c4f))
|
|
11
|
+
* [#294](https://github.com/taoqf/node-fast-html-parser/issues/294) Closing tag is missing but valid HTML is still not parseable ([950865f](https://github.com/taoqf/node-fast-html-parser/commit/950865fab5f4df7853b36712869b71c90f4d3a1b))
|
|
12
|
+
* add missing dev dependency: yarn ([6d73ea3](https://github.com/taoqf/node-fast-html-parser/commit/6d73ea37c48f4170c35907869ba410c5122a9a1f))
|
|
13
|
+
* test valid.js ([a81fc46](https://github.com/taoqf/node-fast-html-parser/commit/a81fc46fab2507615b0362150d62568a6f52ee4e))
|
|
14
|
+
|
|
15
|
+
### [7.0.1](https://github.com/taoqf/node-fast-html-parser/compare/v7.0.0...v7.0.1) (2024-12-26)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
### Bug Fixes
|
|
19
|
+
|
|
20
|
+
* upgrade node version ([efd77ff](https://github.com/taoqf/node-fast-html-parser/commit/efd77ff93593922512b12216984ba778b2f46593))
|
|
21
|
+
|
|
22
|
+
## [7.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.14...v7.0.0) (2024-12-26)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
### ⚠ BREAKING CHANGES
|
|
26
|
+
|
|
27
|
+
* fix #277, for that change estarget to es6
|
|
28
|
+
|
|
29
|
+
### Features
|
|
30
|
+
|
|
31
|
+
* fix [#277](https://github.com/taoqf/node-fast-html-parser/issues/277), for that change estarget to es6 ([432a3e7](https://github.com/taoqf/node-fast-html-parser/commit/432a3e71ba219e76188bcc2e89e525e40911d164))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
### Bug Fixes
|
|
35
|
+
|
|
36
|
+
* add tests for [#227](https://github.com/taoqf/node-fast-html-parser/issues/227) ([5856ee2](https://github.com/taoqf/node-fast-html-parser/commit/5856ee2ef2a0dfef43d75f6d4d13c37c213f25cf))
|
|
37
|
+
|
|
5
38
|
### [6.1.14](https://github.com/taoqf/node-fast-html-parser/compare/v6.1.13...v6.1.14) (2024-05-14)
|
|
6
39
|
|
|
7
40
|
|
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ DOM tree, with element query support.
|
|
|
6
6
|
Per the design, it intends to parse massive HTML files in lowest price, thus the
|
|
7
7
|
performance is the top priority. For this reason, some malformatted HTML may not
|
|
8
8
|
be able to parse correctly, but most usual errors are covered (eg. HTML4 style
|
|
9
|
-
no closing `<
|
|
9
|
+
no closing `<td>` etc).
|
|
10
10
|
|
|
11
11
|
## Install
|
|
12
12
|
|
|
@@ -43,6 +43,7 @@ import { parse } from 'node-html-parser';
|
|
|
43
43
|
|
|
44
44
|
const root = parse('<ul id="list"><li>Hello World</li></ul>');
|
|
45
45
|
|
|
46
|
+
// parse() adds a wrapper node, so the input data's first node is the root's first child node
|
|
46
47
|
console.log(root.firstChild.structure);
|
|
47
48
|
// ul#list
|
|
48
49
|
// li
|
|
@@ -74,7 +75,7 @@ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
|
|
|
74
75
|
|
|
75
76
|
### parse(data[, options])
|
|
76
77
|
|
|
77
|
-
Parse the data provided, and return the root of the generated DOM.
|
|
78
|
+
Parse the data provided, wrap the result in a new node, and return the root of the generated DOM.
|
|
78
79
|
|
|
79
80
|
- **data**, data to parse
|
|
80
81
|
- **options**, parse options
|
|
@@ -186,7 +187,6 @@ Node --|> TextNode
|
|
|
186
187
|
Node ..> ClassList
|
|
187
188
|
```
|
|
188
189
|
|
|
189
|
-
|
|
190
190
|
## HTMLElement Methods
|
|
191
191
|
|
|
192
192
|
### trimRight()
|
|
@@ -316,7 +316,6 @@ Clone a node.
|
|
|
316
316
|
|
|
317
317
|
Get element by it's ID.
|
|
318
318
|
|
|
319
|
-
|
|
320
319
|
## HTMLElement Properties
|
|
321
320
|
|
|
322
321
|
### text
|
|
@@ -351,11 +350,11 @@ Get all child elements, so all child nodes of type HTMLELement.
|
|
|
351
350
|
|
|
352
351
|
### firstChild
|
|
353
352
|
|
|
354
|
-
Get first child node
|
|
353
|
+
Get first child node of the wrapper node added by `parse()`. `undefined` if the node has no children.
|
|
355
354
|
|
|
356
355
|
### lastChild
|
|
357
356
|
|
|
358
|
-
Get last child node
|
|
357
|
+
Get last child node of the wrapper node added by `parse()`. `undefined` if the node has no children.
|
|
359
358
|
|
|
360
359
|
### firstElementChild
|
|
361
360
|
|
package/dist/index.js
CHANGED
|
@@ -4,21 +4,20 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
6
|
exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
|
|
7
|
-
|
|
7
|
+
const comment_1 = __importDefault(require("./nodes/comment"));
|
|
8
8
|
exports.CommentNode = comment_1.default;
|
|
9
|
-
|
|
9
|
+
const html_1 = __importDefault(require("./nodes/html"));
|
|
10
10
|
exports.HTMLElement = html_1.default;
|
|
11
|
-
|
|
11
|
+
const node_1 = __importDefault(require("./nodes/node"));
|
|
12
12
|
exports.Node = node_1.default;
|
|
13
|
-
|
|
13
|
+
const text_1 = __importDefault(require("./nodes/text"));
|
|
14
14
|
exports.TextNode = text_1.default;
|
|
15
|
-
|
|
15
|
+
const type_1 = __importDefault(require("./nodes/type"));
|
|
16
16
|
exports.NodeType = type_1.default;
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
const parse_1 = __importDefault(require("./parse"));
|
|
18
|
+
const valid_1 = __importDefault(require("./valid"));
|
|
19
19
|
exports.valid = valid_1.default;
|
|
20
|
-
function parse(data, options) {
|
|
21
|
-
if (options === void 0) { options = {}; }
|
|
20
|
+
function parse(data, options = {}) {
|
|
22
21
|
return (0, parse_1.default)(data, options);
|
|
23
22
|
}
|
|
24
23
|
exports.default = parse;
|
package/dist/main.js
CHANGED
|
@@ -503,7 +503,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
503
503
|
_this.nodeType = type_3.default.ELEMENT_NODE;
|
|
504
504
|
_this.rawTagName = tagName;
|
|
505
505
|
_this.rawAttrs = rawAttrs || '';
|
|
506
|
-
_this.
|
|
506
|
+
_this._id = keyAttrs.id || '';
|
|
507
507
|
_this.childNodes = [];
|
|
508
508
|
_this._parseOptions = _parseOptions;
|
|
509
509
|
_this.classList = new DOMTokenList(keyAttrs.class ? keyAttrs.class.split(/\s+/) : [], function (classList) { return _this.setAttribute('class', classList.toString()); } // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-call
|
|
@@ -535,7 +535,11 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
535
535
|
if (attr == null) {
|
|
536
536
|
return 'null';
|
|
537
537
|
}
|
|
538
|
-
return JSON.stringify(attr.replace(/"/g, '"'))
|
|
538
|
+
return JSON.stringify(attr.replace(/"/g, '"'))
|
|
539
|
+
.replace(/\\t/g, '\t')
|
|
540
|
+
.replace(/\\n/g, '\n')
|
|
541
|
+
.replace(/\\r/g, '\r')
|
|
542
|
+
.replace(/\\/g, '');
|
|
539
543
|
};
|
|
540
544
|
/**
|
|
541
545
|
* Remove Child element from childNodes array
|
|
@@ -586,6 +590,16 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
586
590
|
enumerable: false,
|
|
587
591
|
configurable: true
|
|
588
592
|
});
|
|
593
|
+
Object.defineProperty(HTMLElement.prototype, "id", {
|
|
594
|
+
get: function () {
|
|
595
|
+
return this._id;
|
|
596
|
+
},
|
|
597
|
+
set: function (newid) {
|
|
598
|
+
this.setAttribute('id', newid);
|
|
599
|
+
},
|
|
600
|
+
enumerable: false,
|
|
601
|
+
configurable: true
|
|
602
|
+
});
|
|
589
603
|
Object.defineProperty(HTMLElement.prototype, "rawText", {
|
|
590
604
|
/**
|
|
591
605
|
* Get escpaed (as-it) text value of current node and its children.
|
|
@@ -738,8 +752,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
738
752
|
return child === _this;
|
|
739
753
|
});
|
|
740
754
|
resetParent([this], null);
|
|
741
|
-
resetParent(content, parent);
|
|
742
|
-
parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), content, true), parent.childNodes.slice(idx + 1), true);
|
|
755
|
+
parent.childNodes = __spreadArray(__spreadArray(__spreadArray([], parent.childNodes.slice(0, idx), true), resetParent(content, parent), true), parent.childNodes.slice(idx + 1), true);
|
|
743
756
|
return this;
|
|
744
757
|
};
|
|
745
758
|
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
|
|
@@ -783,7 +796,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
783
796
|
res.push(' '.repeat(indention) + str);
|
|
784
797
|
}
|
|
785
798
|
function dfs(node) {
|
|
786
|
-
var idStr = node.
|
|
799
|
+
var idStr = node._id ? "#".concat(node._id) : '';
|
|
787
800
|
var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
|
|
788
801
|
write("".concat(node.rawTagName).concat(idStr).concat(classStr));
|
|
789
802
|
indention++;
|
|
@@ -826,10 +839,12 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
826
839
|
});
|
|
827
840
|
this.childNodes.length = o;
|
|
828
841
|
// remove whitespace between attributes
|
|
829
|
-
var attrs = Object.keys(this.rawAttributes)
|
|
842
|
+
var attrs = Object.keys(this.rawAttributes)
|
|
843
|
+
.map(function (key) {
|
|
830
844
|
var val = _this.rawAttributes[key];
|
|
831
845
|
return "".concat(key, "=").concat(JSON.stringify(val));
|
|
832
|
-
})
|
|
846
|
+
})
|
|
847
|
+
.join(' ');
|
|
833
848
|
this.rawAttrs = attrs;
|
|
834
849
|
delete this._rawAttrs;
|
|
835
850
|
return this;
|
|
@@ -918,10 +933,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
918
933
|
continue;
|
|
919
934
|
}
|
|
920
935
|
if (child.nodeType === type_3.default.ELEMENT_NODE) {
|
|
921
|
-
if (child.
|
|
936
|
+
if (child._id === id) {
|
|
922
937
|
return child;
|
|
923
938
|
}
|
|
924
|
-
;
|
|
925
939
|
// if children are existing push the current status to the stack and keep searching for elements in the level below
|
|
926
940
|
if (child.childNodes.length > 0) {
|
|
927
941
|
stack.push(index);
|
|
@@ -1036,7 +1050,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1036
1050
|
}
|
|
1037
1051
|
var attrs = {};
|
|
1038
1052
|
if (this.rawAttrs) {
|
|
1039
|
-
var re = /([a-zA-Z()[\]#@$.?:][a-zA-Z0-9
|
|
1053
|
+
var re = /([a-zA-Z()[\]#@$.?:][a-zA-Z0-9-._:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
1040
1054
|
var match = void 0;
|
|
1041
1055
|
while ((match = re.exec(this.rawAttrs))) {
|
|
1042
1056
|
var key = match[1];
|
|
@@ -1069,9 +1083,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1069
1083
|
return "".concat(name, "=").concat(val);
|
|
1070
1084
|
})
|
|
1071
1085
|
.join(' ');
|
|
1072
|
-
// Update this.
|
|
1086
|
+
// Update this._id
|
|
1073
1087
|
if (key === 'id') {
|
|
1074
|
-
this.
|
|
1088
|
+
this._id = '';
|
|
1075
1089
|
}
|
|
1076
1090
|
return this;
|
|
1077
1091
|
};
|
|
@@ -1117,9 +1131,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1117
1131
|
return "".concat(name, "=").concat(val);
|
|
1118
1132
|
})
|
|
1119
1133
|
.join(' ');
|
|
1120
|
-
// Update this.
|
|
1134
|
+
// Update this._id
|
|
1121
1135
|
if (key === 'id') {
|
|
1122
|
-
this.
|
|
1136
|
+
this._id = value;
|
|
1123
1137
|
}
|
|
1124
1138
|
return this;
|
|
1125
1139
|
};
|
|
@@ -1146,6 +1160,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1146
1160
|
return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
|
|
1147
1161
|
})
|
|
1148
1162
|
.join(' ');
|
|
1163
|
+
// Update this._id
|
|
1164
|
+
if ('id' in attributes) {
|
|
1165
|
+
this._id = attributes['id'];
|
|
1166
|
+
}
|
|
1149
1167
|
return this;
|
|
1150
1168
|
};
|
|
1151
1169
|
HTMLElement.prototype.insertAdjacentHTML = function (where, html) {
|
|
@@ -1372,8 +1390,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1372
1390
|
return HTMLElement;
|
|
1373
1391
|
}(node_2.default));
|
|
1374
1392
|
exports.default = HTMLElement;
|
|
1393
|
+
// #xB7 | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x203F-#x2040] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
|
|
1375
1394
|
// https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name
|
|
1376
|
-
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/
|
|
1395
|
+
var kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z@\xB7\xC0-\xD6\xD8-\xF6\u00F8-\u03A1\u03A3-\u03D9\u03DB-\u03EF\u03F7-\u03FF\u0400-\u04FF\u0500-\u052F\u1D00-\u1D2B\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1E9B\u1F00-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2126\u212A-\u212B\u2132\u214E\u2160-\u2188\u2C60-\u2C7F\uA722-\uA787\uA78B-\uA78E\uA790-\uA7AD\uA7B0-\uA7B7\uA7F7-\uA7FF\uAB30-\uAB5A\uAB5C-\uAB5F\uAB64-\uAB65\uFB00-\uFB06\uFB13-\uFB17\uFF21-\uFF3A\uFF41-\uFF5A\x37F-\u1FFF\u200C-\u200D\u203F-\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/gu;
|
|
1396
|
+
// const kMarkupPattern = /<!--[\s\S]*?-->|<(\/?)([a-zA-Z][-.:0-9_a-zA-Z]*)((?:\s+[^>]*?(?:(?:'[^']*')|(?:"[^"]*"))?)*)\s*(\/?)>/g;
|
|
1377
1397
|
var kAttributePattern = /(?:^|\s)(id|class)\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+)/gi;
|
|
1378
1398
|
var kElementsClosedByOpening = {
|
|
1379
1399
|
li: { li: true, LI: true },
|
|
@@ -1415,6 +1435,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1415
1435
|
th: { tr: true, table: true, TR: true, TABLE: true },
|
|
1416
1436
|
TH: { tr: true, table: true, TR: true, TABLE: true },
|
|
1417
1437
|
};
|
|
1438
|
+
var kElementsClosedByClosingExcept = {
|
|
1439
|
+
p: { a: true, audio: true, del: true, ins: true, map: true, noscript: true, video: true },
|
|
1440
|
+
};
|
|
1418
1441
|
var frameflag = 'documentfragmentcontainer';
|
|
1419
1442
|
/**
|
|
1420
1443
|
* Parses HTML and returns a root element
|
|
@@ -1560,6 +1583,25 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1560
1583
|
continue;
|
|
1561
1584
|
}
|
|
1562
1585
|
}
|
|
1586
|
+
var openTag = currentParent.rawTagName ?
|
|
1587
|
+
currentParent.rawTagName.toLowerCase() :
|
|
1588
|
+
'';
|
|
1589
|
+
if (kElementsClosedByClosingExcept[openTag]) {
|
|
1590
|
+
var closingTag = tagName.toLowerCase();
|
|
1591
|
+
if (stack.length > 1) {
|
|
1592
|
+
var possibleContainer = stack[stack.length - 2];
|
|
1593
|
+
if (possibleContainer &&
|
|
1594
|
+
possibleContainer.rawTagName &&
|
|
1595
|
+
possibleContainer.rawTagName.toLowerCase() === closingTag &&
|
|
1596
|
+
!kElementsClosedByClosingExcept[openTag][closingTag]) {
|
|
1597
|
+
// Update range end for closed tag
|
|
1598
|
+
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1599
|
+
stack.pop();
|
|
1600
|
+
currentParent = (0, back_1.default)(stack);
|
|
1601
|
+
continue;
|
|
1602
|
+
}
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1563
1605
|
// Use aggressive strategy to handle unmatching markups.
|
|
1564
1606
|
break;
|
|
1565
1607
|
}
|
|
@@ -1574,11 +1616,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1574
1616
|
* Parse a chuck of HTML source.
|
|
1575
1617
|
*/
|
|
1576
1618
|
function parse(data, options) {
|
|
1577
|
-
var _a, _b;
|
|
1578
1619
|
if (options === void 0) { options = {}; }
|
|
1579
1620
|
var stack = base_parse(data, options);
|
|
1580
1621
|
var root = stack[0];
|
|
1581
|
-
|
|
1622
|
+
var _loop_1 = function () {
|
|
1582
1623
|
// Handle each error elements.
|
|
1583
1624
|
var last = stack.pop();
|
|
1584
1625
|
var oneBefore = (0, back_1.default)(stack);
|
|
@@ -1588,7 +1629,9 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1588
1629
|
// this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
|
|
1589
1630
|
if (options.parseNoneClosedTags !== true) {
|
|
1590
1631
|
oneBefore.removeChild(last);
|
|
1591
|
-
|
|
1632
|
+
last.childNodes.forEach(function (child) {
|
|
1633
|
+
oneBefore.parentNode.appendChild(child);
|
|
1634
|
+
});
|
|
1592
1635
|
stack.pop();
|
|
1593
1636
|
}
|
|
1594
1637
|
}
|
|
@@ -1598,20 +1641,24 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1598
1641
|
// eslint-disable-next-line no-lonely-if
|
|
1599
1642
|
if (options.parseNoneClosedTags !== true) {
|
|
1600
1643
|
oneBefore.removeChild(last);
|
|
1601
|
-
|
|
1644
|
+
last.childNodes.forEach(function (child) {
|
|
1645
|
+
oneBefore.appendChild(child);
|
|
1646
|
+
});
|
|
1602
1647
|
}
|
|
1603
1648
|
}
|
|
1604
1649
|
}
|
|
1605
1650
|
else {
|
|
1606
1651
|
// If it's final element just skip.
|
|
1607
1652
|
}
|
|
1653
|
+
};
|
|
1654
|
+
while (stack.length > 1) {
|
|
1655
|
+
_loop_1();
|
|
1608
1656
|
}
|
|
1609
1657
|
// response.childNodes.forEach((node) => {
|
|
1610
1658
|
// if (node instanceof HTMLElement) {
|
|
1611
1659
|
// node.parentNode = null;
|
|
1612
1660
|
// }
|
|
1613
1661
|
// });
|
|
1614
|
-
resetParent(root.childNodes, root, true);
|
|
1615
1662
|
return root;
|
|
1616
1663
|
}
|
|
1617
1664
|
exports.parse = parse;
|
|
@@ -1628,13 +1675,10 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1628
1675
|
return val;
|
|
1629
1676
|
});
|
|
1630
1677
|
}
|
|
1631
|
-
function resetParent(nodes, parent
|
|
1632
|
-
|
|
1633
|
-
nodes.forEach(function (node) {
|
|
1678
|
+
function resetParent(nodes, parent) {
|
|
1679
|
+
return nodes.map(function (node) {
|
|
1634
1680
|
node.parentNode = parent;
|
|
1635
|
-
|
|
1636
|
-
resetParent(node.childNodes, node, true);
|
|
1637
|
-
}
|
|
1681
|
+
return node;
|
|
1638
1682
|
});
|
|
1639
1683
|
}
|
|
1640
1684
|
});
|
package/dist/matcher.js
CHANGED
|
@@ -3,7 +3,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
|
|
6
|
+
const type_1 = __importDefault(require("./nodes/type"));
|
|
7
7
|
function isTag(node) {
|
|
8
8
|
return node && node.nodeType === type_1.default.ELEMENT_NODE;
|
|
9
9
|
}
|
|
@@ -23,10 +23,10 @@ function getText(node) {
|
|
|
23
23
|
return node.text;
|
|
24
24
|
}
|
|
25
25
|
function removeSubsets(nodes) {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
let idx = nodes.length;
|
|
27
|
+
let node;
|
|
28
|
+
let ancestor;
|
|
29
|
+
let replace;
|
|
30
30
|
// Check if each node (or one of its ancestors) is already contained in the
|
|
31
31
|
// array.
|
|
32
32
|
while (--idx > -1) {
|
|
@@ -50,26 +50,26 @@ function removeSubsets(nodes) {
|
|
|
50
50
|
return nodes;
|
|
51
51
|
}
|
|
52
52
|
function existsOne(test, elems) {
|
|
53
|
-
return elems.some(
|
|
53
|
+
return elems.some((elem) => {
|
|
54
54
|
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
|
|
55
55
|
});
|
|
56
56
|
}
|
|
57
57
|
function getSiblings(node) {
|
|
58
|
-
|
|
58
|
+
const parent = getParent(node);
|
|
59
59
|
return parent ? getChildren(parent) : [];
|
|
60
60
|
}
|
|
61
61
|
function hasAttrib(elem, name) {
|
|
62
62
|
return getAttributeValue(elem, name) !== undefined;
|
|
63
63
|
}
|
|
64
64
|
function findOne(test, elems) {
|
|
65
|
-
|
|
66
|
-
for (
|
|
67
|
-
|
|
65
|
+
let elem = null;
|
|
66
|
+
for (let i = 0, l = elems === null || elems === void 0 ? void 0 : elems.length; i < l && !elem; i++) {
|
|
67
|
+
const el = elems[i];
|
|
68
68
|
if (test(el)) {
|
|
69
69
|
elem = el;
|
|
70
70
|
}
|
|
71
71
|
else {
|
|
72
|
-
|
|
72
|
+
const childs = getChildren(el);
|
|
73
73
|
if (childs && childs.length > 0) {
|
|
74
74
|
elem = findOne(test, childs);
|
|
75
75
|
}
|
|
@@ -78,29 +78,29 @@ function findOne(test, elems) {
|
|
|
78
78
|
return elem;
|
|
79
79
|
}
|
|
80
80
|
function findAll(test, nodes) {
|
|
81
|
-
|
|
82
|
-
for (
|
|
81
|
+
let result = [];
|
|
82
|
+
for (let i = 0, j = nodes.length; i < j; i++) {
|
|
83
83
|
if (!isTag(nodes[i]))
|
|
84
84
|
continue;
|
|
85
85
|
if (test(nodes[i]))
|
|
86
86
|
result.push(nodes[i]);
|
|
87
|
-
|
|
87
|
+
const childs = getChildren(nodes[i]);
|
|
88
88
|
if (childs)
|
|
89
89
|
result = result.concat(findAll(test, childs));
|
|
90
90
|
}
|
|
91
91
|
return result;
|
|
92
92
|
}
|
|
93
93
|
exports.default = {
|
|
94
|
-
isTag
|
|
95
|
-
getAttributeValue
|
|
96
|
-
getName
|
|
97
|
-
getChildren
|
|
98
|
-
getParent
|
|
99
|
-
getText
|
|
100
|
-
removeSubsets
|
|
101
|
-
existsOne
|
|
102
|
-
getSiblings
|
|
103
|
-
hasAttrib
|
|
104
|
-
findOne
|
|
105
|
-
findAll
|
|
94
|
+
isTag,
|
|
95
|
+
getAttributeValue,
|
|
96
|
+
getName,
|
|
97
|
+
getChildren,
|
|
98
|
+
getParent,
|
|
99
|
+
getText,
|
|
100
|
+
removeSubsets,
|
|
101
|
+
existsOne,
|
|
102
|
+
getSiblings,
|
|
103
|
+
hasAttrib,
|
|
104
|
+
findOne,
|
|
105
|
+
findAll
|
|
106
106
|
};
|
package/dist/nodes/comment.js
CHANGED
|
@@ -1,57 +1,33 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __extends = (this && this.__extends) || (function () {
|
|
3
|
-
var extendStatics = function (d, b) {
|
|
4
|
-
extendStatics = Object.setPrototypeOf ||
|
|
5
|
-
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
-
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
-
return extendStatics(d, b);
|
|
8
|
-
};
|
|
9
|
-
return function (d, b) {
|
|
10
|
-
if (typeof b !== "function" && b !== null)
|
|
11
|
-
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
-
extendStatics(d, b);
|
|
13
|
-
function __() { this.constructor = d; }
|
|
14
|
-
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
-
};
|
|
16
|
-
})();
|
|
17
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
18
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
19
4
|
};
|
|
20
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
6
|
+
const node_1 = __importDefault(require("./node"));
|
|
7
|
+
const type_1 = __importDefault(require("./type"));
|
|
8
|
+
class CommentNode extends node_1.default {
|
|
9
|
+
clone() {
|
|
10
|
+
return new CommentNode(this.rawText, null, undefined, this.rawTagName);
|
|
11
|
+
}
|
|
12
|
+
constructor(rawText, parentNode = null, range, rawTagName = '!--') {
|
|
13
|
+
super(parentNode, range);
|
|
14
|
+
this.rawText = rawText;
|
|
15
|
+
this.rawTagName = rawTagName;
|
|
31
16
|
/**
|
|
32
17
|
* Node Type declaration.
|
|
33
18
|
* @type {Number}
|
|
34
19
|
*/
|
|
35
|
-
|
|
36
|
-
return _this;
|
|
20
|
+
this.nodeType = type_1.default.COMMENT_NODE;
|
|
37
21
|
}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
enumerable: false,
|
|
50
|
-
configurable: true
|
|
51
|
-
});
|
|
52
|
-
CommentNode.prototype.toString = function () {
|
|
53
|
-
return "<!--".concat(this.rawText, "-->");
|
|
54
|
-
};
|
|
55
|
-
return CommentNode;
|
|
56
|
-
}(node_1.default));
|
|
22
|
+
/**
|
|
23
|
+
* Get unescaped text value of current node and its children.
|
|
24
|
+
* @return {string} text content
|
|
25
|
+
*/
|
|
26
|
+
get text() {
|
|
27
|
+
return this.rawText;
|
|
28
|
+
}
|
|
29
|
+
toString() {
|
|
30
|
+
return `<!--${this.rawText}-->`;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
57
33
|
exports.default = CommentNode;
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -43,8 +43,8 @@ export default class HTMLElement extends Node {
|
|
|
43
43
|
private _attrs;
|
|
44
44
|
private _rawAttrs;
|
|
45
45
|
private _parseOptions;
|
|
46
|
+
private _id;
|
|
46
47
|
rawTagName: string;
|
|
47
|
-
id: string;
|
|
48
48
|
classList: DOMTokenList;
|
|
49
49
|
/**
|
|
50
50
|
* Node Type declaration.
|
|
@@ -79,6 +79,8 @@ export default class HTMLElement extends Node {
|
|
|
79
79
|
set tagName(newname: string);
|
|
80
80
|
get localName(): string;
|
|
81
81
|
get isVoidElement(): boolean;
|
|
82
|
+
get id(): string;
|
|
83
|
+
set id(newid: string);
|
|
82
84
|
/**
|
|
83
85
|
* Get escpaed (as-it) text value of current node and its children.
|
|
84
86
|
* @return {string} text content
|