node-html-parser 4.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +3 -0
- package/.eslintrc.json +226 -0
- package/.mocharc.yaml +1 -0
- package/.prettierrc +7 -0
- package/LICENSE +7 -0
- package/README.md +255 -0
- package/dist/back.d.ts +1 -0
- package/dist/back.js +6 -0
- package/dist/esm/back.js +3 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/matcher.js +101 -0
- package/dist/esm/nodes/comment.js +23 -0
- package/dist/esm/nodes/html.js +1048 -0
- package/dist/esm/nodes/node.js +25 -0
- package/dist/esm/nodes/text.js +95 -0
- package/dist/esm/nodes/type.js +7 -0
- package/dist/esm/parse.js +1 -0
- package/dist/esm/valid.js +9 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +21 -0
- package/dist/main.js +1542 -0
- package/dist/matcher.d.ts +6 -0
- package/dist/matcher.js +106 -0
- package/dist/nodes/comment.d.ts +18 -0
- package/dist/nodes/comment.js +51 -0
- package/dist/nodes/html.d.ts +206 -0
- package/dist/nodes/html.js +1188 -0
- package/dist/nodes/node.d.ts +18 -0
- package/dist/nodes/node.js +38 -0
- package/dist/nodes/text.d.ts +42 -0
- package/dist/nodes/text.js +139 -0
- package/dist/nodes/type.d.ts +6 -0
- package/dist/nodes/type.js +9 -0
- package/dist/parse.d.ts +1 -0
- package/dist/parse.js +5 -0
- package/dist/valid.d.ts +6 -0
- package/dist/valid.js +13 -0
- package/package.json +88 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import NodeType from './type';
|
|
2
|
+
import HTMLElement from './html';
|
|
3
|
+
/**
|
|
4
|
+
* Node Class as base class for TextNode and HTMLElement.
|
|
5
|
+
*/
|
|
6
|
+
export default abstract class Node {
|
|
7
|
+
parentNode: HTMLElement;
|
|
8
|
+
abstract nodeType: NodeType;
|
|
9
|
+
childNodes: Node[];
|
|
10
|
+
range: readonly [number, number];
|
|
11
|
+
abstract text: string;
|
|
12
|
+
abstract rawText: string;
|
|
13
|
+
abstract toString(): string;
|
|
14
|
+
constructor(parentNode?: HTMLElement, range?: [number, number]);
|
|
15
|
+
get innerText(): string;
|
|
16
|
+
get textContent(): string;
|
|
17
|
+
set textContent(val: string);
|
|
18
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
var he_1 = require("he");
|
|
4
|
+
/**
|
|
5
|
+
* Node Class as base class for TextNode and HTMLElement.
|
|
6
|
+
*/
|
|
7
|
+
var Node = /** @class */ (function () {
|
|
8
|
+
function Node(parentNode, range) {
|
|
9
|
+
if (parentNode === void 0) { parentNode = null; }
|
|
10
|
+
this.parentNode = parentNode;
|
|
11
|
+
this.childNodes = [];
|
|
12
|
+
Object.defineProperty(this, 'range', {
|
|
13
|
+
enumerable: false,
|
|
14
|
+
writable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
Object.defineProperty(Node.prototype, "innerText", {
|
|
20
|
+
get: function () {
|
|
21
|
+
return this.rawText;
|
|
22
|
+
},
|
|
23
|
+
enumerable: false,
|
|
24
|
+
configurable: true
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(Node.prototype, "textContent", {
|
|
27
|
+
get: function () {
|
|
28
|
+
return (0, he_1.decode)(this.rawText);
|
|
29
|
+
},
|
|
30
|
+
set: function (val) {
|
|
31
|
+
this.rawText = (0, he_1.encode)(val);
|
|
32
|
+
},
|
|
33
|
+
enumerable: false,
|
|
34
|
+
configurable: true
|
|
35
|
+
});
|
|
36
|
+
return Node;
|
|
37
|
+
}());
|
|
38
|
+
exports.default = Node;
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import HTMLElement from './html';
|
|
2
|
+
import Node from './node';
|
|
3
|
+
import NodeType from './type';
|
|
4
|
+
/**
|
|
5
|
+
* TextNode to contain a text element in DOM tree.
|
|
6
|
+
* @param {string} value [description]
|
|
7
|
+
*/
|
|
8
|
+
export default class TextNode extends Node {
|
|
9
|
+
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
10
|
+
/**
|
|
11
|
+
* Node Type declaration.
|
|
12
|
+
* @type {Number}
|
|
13
|
+
*/
|
|
14
|
+
nodeType: NodeType;
|
|
15
|
+
private _rawText;
|
|
16
|
+
private _trimmedRawText?;
|
|
17
|
+
private _trimmedText?;
|
|
18
|
+
get rawText(): string;
|
|
19
|
+
/**
|
|
20
|
+
* Set rawText and invalidate trimmed caches
|
|
21
|
+
*/
|
|
22
|
+
set rawText(text: string);
|
|
23
|
+
/**
|
|
24
|
+
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
25
|
+
*/
|
|
26
|
+
get trimmedRawText(): string;
|
|
27
|
+
/**
|
|
28
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
29
|
+
*/
|
|
30
|
+
get trimmedText(): string;
|
|
31
|
+
/**
|
|
32
|
+
* Get unescaped text value of current node and its children.
|
|
33
|
+
* @return {string} text content
|
|
34
|
+
*/
|
|
35
|
+
get text(): string;
|
|
36
|
+
/**
|
|
37
|
+
* Detect if the node contains only white space.
|
|
38
|
+
* @return {boolean}
|
|
39
|
+
*/
|
|
40
|
+
get isWhitespace(): boolean;
|
|
41
|
+
toString(): string;
|
|
42
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __extends = (this && this.__extends) || (function () {
|
|
3
|
+
var extendStatics = function (d, b) {
|
|
4
|
+
extendStatics = Object.setPrototypeOf ||
|
|
5
|
+
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
+
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
+
return extendStatics(d, b);
|
|
8
|
+
};
|
|
9
|
+
return function (d, b) {
|
|
10
|
+
if (typeof b !== "function" && b !== null)
|
|
11
|
+
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
+
extendStatics(d, b);
|
|
13
|
+
function __() { this.constructor = d; }
|
|
14
|
+
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
+
};
|
|
16
|
+
})();
|
|
17
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
18
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
19
|
+
};
|
|
20
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
+
var he_1 = require("he");
|
|
22
|
+
var node_1 = __importDefault(require("./node"));
|
|
23
|
+
var type_1 = __importDefault(require("./type"));
|
|
24
|
+
/**
|
|
25
|
+
* TextNode to contain a text element in DOM tree.
|
|
26
|
+
* @param {string} value [description]
|
|
27
|
+
*/
|
|
28
|
+
var TextNode = /** @class */ (function (_super) {
|
|
29
|
+
__extends(TextNode, _super);
|
|
30
|
+
function TextNode(rawText, parentNode, range) {
|
|
31
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
32
|
+
/**
|
|
33
|
+
* Node Type declaration.
|
|
34
|
+
* @type {Number}
|
|
35
|
+
*/
|
|
36
|
+
_this.nodeType = type_1.default.TEXT_NODE;
|
|
37
|
+
_this._rawText = rawText;
|
|
38
|
+
return _this;
|
|
39
|
+
}
|
|
40
|
+
Object.defineProperty(TextNode.prototype, "rawText", {
|
|
41
|
+
get: function () {
|
|
42
|
+
return this._rawText;
|
|
43
|
+
},
|
|
44
|
+
/**
|
|
45
|
+
* Set rawText and invalidate trimmed caches
|
|
46
|
+
*/
|
|
47
|
+
set: function (text) {
|
|
48
|
+
this._rawText = text;
|
|
49
|
+
this._trimmedRawText = void 0;
|
|
50
|
+
this._trimmedText = void 0;
|
|
51
|
+
},
|
|
52
|
+
enumerable: false,
|
|
53
|
+
configurable: true
|
|
54
|
+
});
|
|
55
|
+
Object.defineProperty(TextNode.prototype, "trimmedRawText", {
|
|
56
|
+
/**
|
|
57
|
+
* Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
58
|
+
*/
|
|
59
|
+
get: function () {
|
|
60
|
+
if (this._trimmedRawText !== undefined)
|
|
61
|
+
return this._trimmedRawText;
|
|
62
|
+
this._trimmedRawText = trimText(this.rawText);
|
|
63
|
+
return this._trimmedRawText;
|
|
64
|
+
},
|
|
65
|
+
enumerable: false,
|
|
66
|
+
configurable: true
|
|
67
|
+
});
|
|
68
|
+
Object.defineProperty(TextNode.prototype, "trimmedText", {
|
|
69
|
+
/**
|
|
70
|
+
* Returns text with all whitespace trimmed except single leading/trailing non-breaking space
|
|
71
|
+
*/
|
|
72
|
+
get: function () {
|
|
73
|
+
if (this._trimmedText !== undefined)
|
|
74
|
+
return this._trimmedText;
|
|
75
|
+
this._trimmedText = trimText(this.text);
|
|
76
|
+
return this._trimmedText;
|
|
77
|
+
},
|
|
78
|
+
enumerable: false,
|
|
79
|
+
configurable: true
|
|
80
|
+
});
|
|
81
|
+
Object.defineProperty(TextNode.prototype, "text", {
|
|
82
|
+
/**
|
|
83
|
+
* Get unescaped text value of current node and its children.
|
|
84
|
+
* @return {string} text content
|
|
85
|
+
*/
|
|
86
|
+
get: function () {
|
|
87
|
+
return (0, he_1.decode)(this.rawText);
|
|
88
|
+
},
|
|
89
|
+
enumerable: false,
|
|
90
|
+
configurable: true
|
|
91
|
+
});
|
|
92
|
+
Object.defineProperty(TextNode.prototype, "isWhitespace", {
|
|
93
|
+
/**
|
|
94
|
+
* Detect if the node contains only white space.
|
|
95
|
+
* @return {boolean}
|
|
96
|
+
*/
|
|
97
|
+
get: function () {
|
|
98
|
+
return /^(\s| )*$/.test(this.rawText);
|
|
99
|
+
},
|
|
100
|
+
enumerable: false,
|
|
101
|
+
configurable: true
|
|
102
|
+
});
|
|
103
|
+
TextNode.prototype.toString = function () {
|
|
104
|
+
return this.rawText;
|
|
105
|
+
};
|
|
106
|
+
return TextNode;
|
|
107
|
+
}(node_1.default));
|
|
108
|
+
exports.default = TextNode;
|
|
109
|
+
/**
|
|
110
|
+
* Trim whitespace except single leading/trailing non-breaking space
|
|
111
|
+
*/
|
|
112
|
+
function trimText(text) {
|
|
113
|
+
var i = 0;
|
|
114
|
+
var startPos;
|
|
115
|
+
var endPos;
|
|
116
|
+
while (i >= 0 && i < text.length) {
|
|
117
|
+
if (/\S/.test(text[i])) {
|
|
118
|
+
if (startPos === undefined) {
|
|
119
|
+
startPos = i;
|
|
120
|
+
i = text.length;
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
endPos = i;
|
|
124
|
+
i = void 0;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (startPos === undefined)
|
|
128
|
+
i++;
|
|
129
|
+
else
|
|
130
|
+
i--;
|
|
131
|
+
}
|
|
132
|
+
if (startPos === undefined)
|
|
133
|
+
startPos = 0;
|
|
134
|
+
if (endPos === undefined)
|
|
135
|
+
endPos = text.length - 1;
|
|
136
|
+
var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
|
|
137
|
+
var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
|
|
138
|
+
return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
|
|
139
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
var NodeType;
|
|
4
|
+
(function (NodeType) {
|
|
5
|
+
NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
|
|
6
|
+
NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
|
|
7
|
+
NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
|
|
8
|
+
})(NodeType || (NodeType = {}));
|
|
9
|
+
exports.default = NodeType;
|
package/dist/parse.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { parse as default } from './nodes/html';
|
package/dist/parse.js
ADDED
package/dist/valid.d.ts
ADDED
package/dist/valid.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
var html_1 = require("./nodes/html");
|
|
4
|
+
/**
|
|
5
|
+
* Parses HTML and returns a root element
|
|
6
|
+
* Parse a chuck of HTML source.
|
|
7
|
+
*/
|
|
8
|
+
function valid(data, options) {
|
|
9
|
+
if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
|
|
10
|
+
var stack = (0, html_1.base_parse)(data, options);
|
|
11
|
+
return Boolean(stack.length === 1);
|
|
12
|
+
}
|
|
13
|
+
exports.default = valid;
|
package/package.json
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "node-html-parser",
|
|
3
|
+
"version": "4.1.5",
|
|
4
|
+
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/esm/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"test": "mocha",
|
|
10
|
+
"lint": "eslint ./src/*.ts ./src/**/*.ts",
|
|
11
|
+
"clean": "del-cli ./dist/",
|
|
12
|
+
"ts:cjs": "tsc -m commonjs",
|
|
13
|
+
"ts:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
|
|
14
|
+
"ts:esm": "tsc -t es2019 -m esnext -d false --outDir ./dist/esm/",
|
|
15
|
+
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
|
|
16
|
+
"dev": "tsc -w & mocha -w ./test/*.js",
|
|
17
|
+
"pretest": "tsc -m commonjs",
|
|
18
|
+
"release": "yarn build && np",
|
|
19
|
+
"prepare": "npm run build"
|
|
20
|
+
},
|
|
21
|
+
"keywords": [
|
|
22
|
+
"parser",
|
|
23
|
+
"html",
|
|
24
|
+
"nodejs",
|
|
25
|
+
"typescript"
|
|
26
|
+
],
|
|
27
|
+
"author": "Xiaoyi Shi <ashi009@gmail.com>",
|
|
28
|
+
"contributors": [
|
|
29
|
+
"taoqf<tao_qiufeng@126.com>"
|
|
30
|
+
],
|
|
31
|
+
"license": "MIT",
|
|
32
|
+
"publishConfig": {
|
|
33
|
+
"registry": "https://registry.npmjs.org"
|
|
34
|
+
},
|
|
35
|
+
"dependencies": {
|
|
36
|
+
"css-select": "^4.1.3",
|
|
37
|
+
"he": "1.2.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/entities": "latest",
|
|
41
|
+
"@types/he": "latest",
|
|
42
|
+
"@types/node": "latest",
|
|
43
|
+
"@typescript-eslint/eslint-plugin": "latest",
|
|
44
|
+
"@typescript-eslint/eslint-plugin-tslint": "latest",
|
|
45
|
+
"@typescript-eslint/parser": "latest",
|
|
46
|
+
"blanket": "latest",
|
|
47
|
+
"cheerio": "^1.0.0-rc.5",
|
|
48
|
+
"del-cli": "latest",
|
|
49
|
+
"eslint": "latest",
|
|
50
|
+
"eslint-config-prettier": "latest",
|
|
51
|
+
"eslint-plugin-import": "latest",
|
|
52
|
+
"high5": "^1.0.0",
|
|
53
|
+
"htmlparser": "^1.7.7",
|
|
54
|
+
"htmlparser-benchmark": "^1.1.3",
|
|
55
|
+
"htmlparser2": "^6.0.0",
|
|
56
|
+
"mocha": "latest",
|
|
57
|
+
"mocha-each": "^2.0.1",
|
|
58
|
+
"np": "latest",
|
|
59
|
+
"parse5": "^6.0.1",
|
|
60
|
+
"should": "latest",
|
|
61
|
+
"spec": "latest",
|
|
62
|
+
"travis-cov": "latest",
|
|
63
|
+
"typescript": "next"
|
|
64
|
+
},
|
|
65
|
+
"config": {
|
|
66
|
+
"blanket": {
|
|
67
|
+
"pattern": "./dist/index.js",
|
|
68
|
+
"data-cover-never": [
|
|
69
|
+
"node_modules"
|
|
70
|
+
]
|
|
71
|
+
},
|
|
72
|
+
"travis-cov": {
|
|
73
|
+
"threshold": 70
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
"directories": {
|
|
77
|
+
"test": "test"
|
|
78
|
+
},
|
|
79
|
+
"repository": {
|
|
80
|
+
"type": "git",
|
|
81
|
+
"url": "https://github.com/taoqf/node-fast-html-parser.git"
|
|
82
|
+
},
|
|
83
|
+
"bugs": {
|
|
84
|
+
"url": "https://github.com/taoqf/node-fast-html-parser/issues"
|
|
85
|
+
},
|
|
86
|
+
"homepage": "https://github.com/taoqf/node-fast-html-parser",
|
|
87
|
+
"sideEffects": false
|
|
88
|
+
}
|