node-html-parser 6.1.15-0 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/README.md +5 -6
- package/dist/index.js +8 -9
- package/dist/main.js +70 -26
- package/dist/matcher.js +26 -26
- package/dist/nodes/comment.js +22 -46
- package/dist/nodes/html.d.ts +3 -1
- package/dist/nodes/html.js +579 -725
- package/dist/nodes/node.js +18 -29
- package/dist/nodes/text.js +61 -100
- package/dist/valid.js +3 -4
- package/dist/void-tag.js +13 -15
- package/package.json +3 -2
package/dist/nodes/node.js
CHANGED
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
|
|
3
|
+
const he_1 = require("he");
|
|
4
4
|
/**
|
|
5
5
|
* Node Class as base class for TextNode and HTMLElement.
|
|
6
6
|
*/
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
if (parentNode === void 0) { parentNode = null; }
|
|
7
|
+
class Node {
|
|
8
|
+
constructor(parentNode = null, range) {
|
|
10
9
|
this.parentNode = parentNode;
|
|
11
10
|
this.childNodes = [];
|
|
12
11
|
Object.defineProperty(this, 'range', {
|
|
@@ -19,34 +18,24 @@ var Node = /** @class */ (function () {
|
|
|
19
18
|
/**
|
|
20
19
|
* Remove current node
|
|
21
20
|
*/
|
|
22
|
-
|
|
23
|
-
var _this = this;
|
|
21
|
+
remove() {
|
|
24
22
|
if (this.parentNode) {
|
|
25
|
-
|
|
26
|
-
this.parentNode.childNodes = children.filter(
|
|
27
|
-
return
|
|
23
|
+
const children = this.parentNode.childNodes;
|
|
24
|
+
this.parentNode.childNodes = children.filter((child) => {
|
|
25
|
+
return this !== child;
|
|
28
26
|
});
|
|
29
27
|
this.parentNode = null;
|
|
30
28
|
}
|
|
31
29
|
return this;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
},
|
|
44
|
-
set: function (val) {
|
|
45
|
-
this.rawText = (0, he_1.encode)(val);
|
|
46
|
-
},
|
|
47
|
-
enumerable: false,
|
|
48
|
-
configurable: true
|
|
49
|
-
});
|
|
50
|
-
return Node;
|
|
51
|
-
}());
|
|
30
|
+
}
|
|
31
|
+
get innerText() {
|
|
32
|
+
return this.rawText;
|
|
33
|
+
}
|
|
34
|
+
get textContent() {
|
|
35
|
+
return (0, he_1.decode)(this.rawText);
|
|
36
|
+
}
|
|
37
|
+
set textContent(val) {
|
|
38
|
+
this.rawText = (0, he_1.encode)(val);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
52
41
|
exports.default = Node;
|
package/dist/nodes/text.js
CHANGED
|
@@ -1,123 +1,84 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
-
var __extends = (this && this.__extends) || (function () {
|
|
3
|
-
var extendStatics = function (d, b) {
|
|
4
|
-
extendStatics = Object.setPrototypeOf ||
|
|
5
|
-
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
-
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
-
return extendStatics(d, b);
|
|
8
|
-
};
|
|
9
|
-
return function (d, b) {
|
|
10
|
-
if (typeof b !== "function" && b !== null)
|
|
11
|
-
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
-
extendStatics(d, b);
|
|
13
|
-
function __() { this.constructor = d; }
|
|
14
|
-
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
-
};
|
|
16
|
-
})();
|
|
17
2
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
18
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
19
4
|
};
|
|
20
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
6
|
+
const he_1 = require("he");
|
|
7
|
+
const node_1 = __importDefault(require("./node"));
|
|
8
|
+
const type_1 = __importDefault(require("./type"));
|
|
24
9
|
/**
|
|
25
10
|
* TextNode to contain a text element in DOM tree.
|
|
26
11
|
* @param {string} value [description]
|
|
27
12
|
*/
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
13
|
+
class TextNode extends node_1.default {
|
|
14
|
+
clone() {
|
|
15
|
+
return new TextNode(this._rawText, null);
|
|
16
|
+
}
|
|
17
|
+
constructor(rawText, parentNode = null, range) {
|
|
18
|
+
super(parentNode, range);
|
|
33
19
|
/**
|
|
34
20
|
* Node Type declaration.
|
|
35
21
|
* @type {Number}
|
|
36
22
|
*/
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return _this;
|
|
23
|
+
this.nodeType = type_1.default.TEXT_NODE;
|
|
24
|
+
this.rawTagName = '';
|
|
25
|
+
this._rawText = rawText;
|
|
41
26
|
}
|
|
42
|
-
|
|
43
|
-
return
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
configurable: true
|
|
59
|
-
});
|
|
60
|
-
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
61
|
-
/**
|
|
62
|
-
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
63
|
-
*/
|
|
64
|
-
get: function () {
|
|
65
|
-
if (this._trimmedRawText !== undefined)
|
|
66
|
-
return this._trimmedRawText;
|
|
67
|
-
this._trimmedRawText = trimText(this.rawText);
|
|
27
|
+
get rawText() {
|
|
28
|
+
return this._rawText;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Set rawText and invalidate trimmed caches
|
|
32
|
+
*/
|
|
33
|
+
set rawText(text) {
|
|
34
|
+
this._rawText = text;
|
|
35
|
+
this._trimmedRawText = void 0;
|
|
36
|
+
this._trimmedText = void 0;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
40
|
+
*/
|
|
41
|
+
get trimmedRawText() {
|
|
42
|
+
if (this._trimmedRawText !== undefined)
|
|
68
43
|
return this._trimmedRawText;
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
get: function () {
|
|
78
|
-
if (this._trimmedText !== undefined)
|
|
79
|
-
return this._trimmedText;
|
|
80
|
-
this._trimmedText = trimText(this.text);
|
|
44
|
+
this._trimmedRawText = trimText(this.rawText);
|
|
45
|
+
return this._trimmedRawText;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
49
|
+
*/
|
|
50
|
+
get trimmedText() {
|
|
51
|
+
if (this._trimmedText !== undefined)
|
|
81
52
|
return this._trimmedText;
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
* @return {boolean}
|
|
101
|
-
*/
|
|
102
|
-
get: function () {
|
|
103
|
-
return /^(\s| )*$/.test(this.rawText);
|
|
104
|
-
},
|
|
105
|
-
enumerable: false,
|
|
106
|
-
configurable: true
|
|
107
|
-
});
|
|
108
|
-
TextNode.prototype.toString = function () {
|
|
53
|
+
this._trimmedText = trimText(this.text);
|
|
54
|
+
return this._trimmedText;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Get unescaped text value of current node and its children.
|
|
58
|
+
* @return {string} text content
|
|
59
|
+
*/
|
|
60
|
+
get text() {
|
|
61
|
+
return (0, he_1.decode)(this.rawText);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Detect if the node contains only white space.
|
|
65
|
+
* @return {boolean}
|
|
66
|
+
*/
|
|
67
|
+
get isWhitespace() {
|
|
68
|
+
return /^(\s| )*$/.test(this.rawText);
|
|
69
|
+
}
|
|
70
|
+
toString() {
|
|
109
71
|
return this.rawText;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
}(node_1.default));
|
|
72
|
+
}
|
|
73
|
+
}
|
|
113
74
|
exports.default = TextNode;
|
|
114
75
|
/**
|
|
115
76
|
* Trim whitespace except single leading/trailing non-breaking space
|
|
116
77
|
*/
|
|
117
78
|
function trimText(text) {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
79
|
+
let i = 0;
|
|
80
|
+
let startPos;
|
|
81
|
+
let endPos;
|
|
121
82
|
while (i >= 0 && i < text.length) {
|
|
122
83
|
if (/\S/.test(text[i])) {
|
|
123
84
|
if (startPos === undefined) {
|
|
@@ -138,7 +99,7 @@ function trimText(text) {
|
|
|
138
99
|
startPos = 0;
|
|
139
100
|
if (endPos === undefined)
|
|
140
101
|
endPos = text.length - 1;
|
|
141
|
-
|
|
142
|
-
|
|
102
|
+
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
103
|
+
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
143
104
|
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
144
105
|
}
|
package/dist/valid.js
CHANGED
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
|
|
3
|
+
const html_1 = require("./nodes/html");
|
|
4
4
|
/**
|
|
5
5
|
* Parses HTML and returns a root element
|
|
6
6
|
* Parse a chuck of HTML source.
|
|
7
7
|
*/
|
|
8
|
-
function valid(data, options) {
|
|
9
|
-
|
|
10
|
-
var stack = (0, html_1.base_parse)(data, options);
|
|
8
|
+
function valid(data, options = {}) {
|
|
9
|
+
const stack = (0, html_1.base_parse)(data, options);
|
|
11
10
|
return Boolean(stack.length === 1);
|
|
12
11
|
}
|
|
13
12
|
exports.default = valid;
|
package/dist/void-tag.js
CHANGED
|
@@ -1,29 +1,27 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
if (addClosingSlash === void 0) { addClosingSlash = false; }
|
|
3
|
+
class VoidTag {
|
|
4
|
+
constructor(addClosingSlash = false, tags) {
|
|
6
5
|
this.addClosingSlash = addClosingSlash;
|
|
7
6
|
if (Array.isArray(tags)) {
|
|
8
|
-
this.voidTags = tags.reduce(
|
|
7
|
+
this.voidTags = tags.reduce((set, tag) => {
|
|
9
8
|
return set.add(tag.toLowerCase()).add(tag.toUpperCase()).add(tag);
|
|
10
9
|
}, new Set());
|
|
11
10
|
}
|
|
12
11
|
else {
|
|
13
|
-
this.voidTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'].reduce(
|
|
12
|
+
this.voidTags = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'].reduce((set, tag) => {
|
|
14
13
|
return set.add(tag.toLowerCase()).add(tag.toUpperCase()).add(tag);
|
|
15
14
|
}, new Set());
|
|
16
15
|
}
|
|
17
16
|
}
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
return this.isVoidElement(tag.toLowerCase()) ?
|
|
23
|
-
}
|
|
24
|
-
|
|
17
|
+
formatNode(tag, attrs, innerHTML) {
|
|
18
|
+
const addClosingSlash = this.addClosingSlash;
|
|
19
|
+
const closingSpace = (addClosingSlash && attrs && !attrs.endsWith(' ')) ? ' ' : '';
|
|
20
|
+
const closingSlash = addClosingSlash ? `${closingSpace}/` : '';
|
|
21
|
+
return this.isVoidElement(tag.toLowerCase()) ? `<${tag}${attrs}${closingSlash}>` : `<${tag}${attrs}>${innerHTML}</${tag}>`;
|
|
22
|
+
}
|
|
23
|
+
isVoidElement(tag) {
|
|
25
24
|
return this.voidTags.has(tag);
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
}());
|
|
25
|
+
}
|
|
26
|
+
}
|
|
29
27
|
exports.default = VoidTag;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "7.0.2",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -87,7 +87,8 @@
|
|
|
87
87
|
"standard-version": "^9.5.0",
|
|
88
88
|
"travis-cov": "latest",
|
|
89
89
|
"ts-node": "^10.9.1",
|
|
90
|
-
"typescript": "latest"
|
|
90
|
+
"typescript": "latest",
|
|
91
|
+
"yarn": "^1.22.22"
|
|
91
92
|
},
|
|
92
93
|
"config": {
|
|
93
94
|
"blanket": {
|