node-html-parser 4.1.4 → 5.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.md +34 -25
- package/dist/main.js +177 -271
- package/dist/nodes/html.d.ts +12 -5
- package/dist/nodes/html.js +177 -271
- package/esm/index.js +11 -0
- package/esm/package.json +3 -0
- package/package.json +46 -17
- package/.eslintignore +0 -3
- package/.eslintrc.json +0 -226
- package/.mocharc.yaml +0 -1
- package/dist/esm/back.js +0 -3
- package/dist/esm/index.js +0 -7
- package/dist/esm/matcher.js +0 -101
- package/dist/esm/nodes/comment.js +0 -23
- package/dist/esm/nodes/html.js +0 -1102
- package/dist/esm/nodes/node.js +0 -25
- package/dist/esm/nodes/text.js +0 -95
- package/dist/esm/nodes/type.js +0 -7
- package/dist/esm/parse.js +0 -1
- package/dist/esm/valid.js +0 -9
package/dist/esm/nodes/node.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import { decode, encode } from 'he';
|
|
2
|
-
/**
|
|
3
|
-
* Node Class as base class for TextNode and HTMLElement.
|
|
4
|
-
*/
|
|
5
|
-
export default class Node {
|
|
6
|
-
constructor(parentNode = null, range) {
|
|
7
|
-
this.parentNode = parentNode;
|
|
8
|
-
this.childNodes = [];
|
|
9
|
-
Object.defineProperty(this, 'range', {
|
|
10
|
-
enumerable: false,
|
|
11
|
-
writable: true,
|
|
12
|
-
configurable: true,
|
|
13
|
-
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
14
|
-
});
|
|
15
|
-
}
|
|
16
|
-
get innerText() {
|
|
17
|
-
return this.rawText;
|
|
18
|
-
}
|
|
19
|
-
get textContent() {
|
|
20
|
-
return decode(this.rawText);
|
|
21
|
-
}
|
|
22
|
-
set textContent(val) {
|
|
23
|
-
this.rawText = encode(val);
|
|
24
|
-
}
|
|
25
|
-
}
|
package/dist/esm/nodes/text.js
DELETED
|
@@ -1,95 +0,0 @@
|
|
|
1
|
-
import { decode } from 'he';
|
|
2
|
-
import Node from './node';
|
|
3
|
-
import NodeType from './type';
|
|
4
|
-
/**
|
|
5
|
-
* TextNode to contain a text element in DOM tree.
|
|
6
|
-
* @param {string} value [description]
|
|
7
|
-
*/
|
|
8
|
-
export default class TextNode extends Node {
|
|
9
|
-
constructor(rawText, parentNode, range) {
|
|
10
|
-
super(parentNode, range);
|
|
11
|
-
/**
|
|
12
|
-
* Node Type declaration.
|
|
13
|
-
* @type {Number}
|
|
14
|
-
*/
|
|
15
|
-
this.nodeType = NodeType.TEXT_NODE;
|
|
16
|
-
this._rawText = rawText;
|
|
17
|
-
}
|
|
18
|
-
get rawText() {
|
|
19
|
-
return this._rawText;
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* Set rawText and invalidate trimmed caches
|
|
23
|
-
*/
|
|
24
|
-
set rawText(text) {
|
|
25
|
-
this._rawText = text;
|
|
26
|
-
this._trimmedRawText = void 0;
|
|
27
|
-
this._trimmedText = void 0;
|
|
28
|
-
}
|
|
29
|
-
/**
|
|
30
|
-
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
31
|
-
*/
|
|
32
|
-
get trimmedRawText() {
|
|
33
|
-
if (this._trimmedRawText !== undefined)
|
|
34
|
-
return this._trimmedRawText;
|
|
35
|
-
this._trimmedRawText = trimText(this.rawText);
|
|
36
|
-
return this._trimmedRawText;
|
|
37
|
-
}
|
|
38
|
-
/**
|
|
39
|
-
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
40
|
-
*/
|
|
41
|
-
get trimmedText() {
|
|
42
|
-
if (this._trimmedText !== undefined)
|
|
43
|
-
return this._trimmedText;
|
|
44
|
-
this._trimmedText = trimText(this.text);
|
|
45
|
-
return this._trimmedText;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Get unescaped text value of current node and its children.
|
|
49
|
-
* @return {string} text content
|
|
50
|
-
*/
|
|
51
|
-
get text() {
|
|
52
|
-
return decode(this.rawText);
|
|
53
|
-
}
|
|
54
|
-
/**
|
|
55
|
-
* Detect if the node contains only white space.
|
|
56
|
-
* @return {boolean}
|
|
57
|
-
*/
|
|
58
|
-
get isWhitespace() {
|
|
59
|
-
return /^(\s| )*$/.test(this.rawText);
|
|
60
|
-
}
|
|
61
|
-
toString() {
|
|
62
|
-
return this.rawText;
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
/**
|
|
66
|
-
* Trim whitespace except single leading/trailing non-breaking space
|
|
67
|
-
*/
|
|
68
|
-
function trimText(text) {
|
|
69
|
-
let i = 0;
|
|
70
|
-
let startPos;
|
|
71
|
-
let endPos;
|
|
72
|
-
while (i >= 0 && i < text.length) {
|
|
73
|
-
if (/\S/.test(text[i])) {
|
|
74
|
-
if (startPos === undefined) {
|
|
75
|
-
startPos = i;
|
|
76
|
-
i = text.length;
|
|
77
|
-
}
|
|
78
|
-
else {
|
|
79
|
-
endPos = i;
|
|
80
|
-
i = void 0;
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
if (startPos === undefined)
|
|
84
|
-
i++;
|
|
85
|
-
else
|
|
86
|
-
i--;
|
|
87
|
-
}
|
|
88
|
-
if (startPos === undefined)
|
|
89
|
-
startPos = 0;
|
|
90
|
-
if (endPos === undefined)
|
|
91
|
-
endPos = text.length - 1;
|
|
92
|
-
const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
93
|
-
const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
94
|
-
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
95
|
-
}
|
package/dist/esm/nodes/type.js
DELETED
|
@@ -1,7 +0,0 @@
|
|
|
1
|
-
var NodeType;
|
|
2
|
-
(function (NodeType) {
|
|
3
|
-
NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
|
|
4
|
-
NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
|
|
5
|
-
NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
|
|
6
|
-
})(NodeType || (NodeType = {}));
|
|
7
|
-
export default NodeType;
|
package/dist/esm/parse.js
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
export { parse as default } from './nodes/html';
|
package/dist/esm/valid.js
DELETED
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { base_parse } from './nodes/html';
|
|
2
|
-
/**
|
|
3
|
-
* Parses HTML and returns a root element
|
|
4
|
-
* Parse a chuck of HTML source.
|
|
5
|
-
*/
|
|
6
|
-
export default function valid(data, options = { lowerCaseTagName: false, comment: false }) {
|
|
7
|
-
const stack = base_parse(data, options);
|
|
8
|
-
return Boolean(stack.length === 1);
|
|
9
|
-
}
|