node-html-parser 4.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ import NodeType from './type';
2
+ import HTMLElement from './html';
3
+ /**
4
+ * Node Class as base class for TextNode and HTMLElement.
5
+ */
6
+ export default abstract class Node {
7
+ parentNode: HTMLElement;
8
+ abstract nodeType: NodeType;
9
+ childNodes: Node[];
10
+ range: readonly [number, number];
11
+ abstract text: string;
12
+ abstract rawText: string;
13
+ abstract toString(): string;
14
+ constructor(parentNode?: HTMLElement, range?: [number, number]);
15
+ get innerText(): string;
16
+ get textContent(): string;
17
+ set textContent(val: string);
18
+ }
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ var he_1 = require("he");
4
+ /**
5
+ * Node Class as base class for TextNode and HTMLElement.
6
+ */
7
+ var Node = /** @class */ (function () {
8
+ function Node(parentNode, range) {
9
+ if (parentNode === void 0) { parentNode = null; }
10
+ this.parentNode = parentNode;
11
+ this.childNodes = [];
12
+ Object.defineProperty(this, 'range', {
13
+ enumerable: false,
14
+ writable: true,
15
+ configurable: true,
16
+ value: range !== null && range !== void 0 ? range : [-1, -1]
17
+ });
18
+ }
19
+ Object.defineProperty(Node.prototype, "innerText", {
20
+ get: function () {
21
+ return this.rawText;
22
+ },
23
+ enumerable: false,
24
+ configurable: true
25
+ });
26
+ Object.defineProperty(Node.prototype, "textContent", {
27
+ get: function () {
28
+ return (0, he_1.decode)(this.rawText);
29
+ },
30
+ set: function (val) {
31
+ this.rawText = (0, he_1.encode)(val);
32
+ },
33
+ enumerable: false,
34
+ configurable: true
35
+ });
36
+ return Node;
37
+ }());
38
+ exports.default = Node;
@@ -0,0 +1,42 @@
1
+ import HTMLElement from './html';
2
+ import Node from './node';
3
+ import NodeType from './type';
4
+ /**
5
+ * TextNode to contain a text element in DOM tree.
6
+ * @param {string} value [description]
7
+ */
8
+ export default class TextNode extends Node {
9
+ constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
10
+ /**
11
+ * Node Type declaration.
12
+ * @type {Number}
13
+ */
14
+ nodeType: NodeType;
15
+ private _rawText;
16
+ private _trimmedRawText?;
17
+ private _trimmedText?;
18
+ get rawText(): string;
19
+ /**
20
+ * Set rawText and invalidate trimmed caches
21
+ */
22
+ set rawText(text: string);
23
+ /**
24
+ * Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
25
+ */
26
+ get trimmedRawText(): string;
27
+ /**
28
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
29
+ */
30
+ get trimmedText(): string;
31
+ /**
32
+ * Get unescaped text value of current node and its children.
33
+ * @return {string} text content
34
+ */
35
+ get text(): string;
36
+ /**
37
+ * Detect if the node contains only white space.
38
+ * @return {boolean}
39
+ */
40
+ get isWhitespace(): boolean;
41
+ toString(): string;
42
+ }
@@ -0,0 +1,139 @@
1
+ "use strict";
2
+ var __extends = (this && this.__extends) || (function () {
3
+ var extendStatics = function (d, b) {
4
+ extendStatics = Object.setPrototypeOf ||
5
+ ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
6
+ function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
7
+ return extendStatics(d, b);
8
+ };
9
+ return function (d, b) {
10
+ if (typeof b !== "function" && b !== null)
11
+ throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
12
+ extendStatics(d, b);
13
+ function __() { this.constructor = d; }
14
+ d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
+ };
16
+ })();
17
+ var __importDefault = (this && this.__importDefault) || function (mod) {
18
+ return (mod && mod.__esModule) ? mod : { "default": mod };
19
+ };
20
+ Object.defineProperty(exports, "__esModule", { value: true });
21
+ var he_1 = require("he");
22
+ var node_1 = __importDefault(require("./node"));
23
+ var type_1 = __importDefault(require("./type"));
24
+ /**
25
+ * TextNode to contain a text element in DOM tree.
26
+ * @param {string} value [description]
27
+ */
28
+ var TextNode = /** @class */ (function (_super) {
29
+ __extends(TextNode, _super);
30
+ function TextNode(rawText, parentNode, range) {
31
+ var _this = _super.call(this, parentNode, range) || this;
32
+ /**
33
+ * Node Type declaration.
34
+ * @type {Number}
35
+ */
36
+ _this.nodeType = type_1.default.TEXT_NODE;
37
+ _this._rawText = rawText;
38
+ return _this;
39
+ }
40
+ Object.defineProperty(TextNode.prototype, "rawText", {
41
+ get: function () {
42
+ return this._rawText;
43
+ },
44
+ /**
45
+ * Set rawText and invalidate trimmed caches
46
+ */
47
+ set: function (text) {
48
+ this._rawText = text;
49
+ this._trimmedRawText = void 0;
50
+ this._trimmedText = void 0;
51
+ },
52
+ enumerable: false,
53
+ configurable: true
54
+ });
55
+ Object.defineProperty(TextNode.prototype, "trimmedRawText", {
56
+ /**
57
+ * Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
58
+ */
59
+ get: function () {
60
+ if (this._trimmedRawText !== undefined)
61
+ return this._trimmedRawText;
62
+ this._trimmedRawText = trimText(this.rawText);
63
+ return this._trimmedRawText;
64
+ },
65
+ enumerable: false,
66
+ configurable: true
67
+ });
68
+ Object.defineProperty(TextNode.prototype, "trimmedText", {
69
+ /**
70
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
71
+ */
72
+ get: function () {
73
+ if (this._trimmedText !== undefined)
74
+ return this._trimmedText;
75
+ this._trimmedText = trimText(this.text);
76
+ return this._trimmedText;
77
+ },
78
+ enumerable: false,
79
+ configurable: true
80
+ });
81
+ Object.defineProperty(TextNode.prototype, "text", {
82
+ /**
83
+ * Get unescaped text value of current node and its children.
84
+ * @return {string} text content
85
+ */
86
+ get: function () {
87
+ return (0, he_1.decode)(this.rawText);
88
+ },
89
+ enumerable: false,
90
+ configurable: true
91
+ });
92
+ Object.defineProperty(TextNode.prototype, "isWhitespace", {
93
+ /**
94
+ * Detect if the node contains only white space.
95
+ * @return {boolean}
96
+ */
97
+ get: function () {
98
+ return /^(\s| )*$/.test(this.rawText);
99
+ },
100
+ enumerable: false,
101
+ configurable: true
102
+ });
103
+ TextNode.prototype.toString = function () {
104
+ return this.rawText;
105
+ };
106
+ return TextNode;
107
+ }(node_1.default));
108
+ exports.default = TextNode;
109
+ /**
110
+ * Trim whitespace except single leading/trailing non-breaking space
111
+ */
112
+ function trimText(text) {
113
+ var i = 0;
114
+ var startPos;
115
+ var endPos;
116
+ while (i >= 0 && i < text.length) {
117
+ if (/\S/.test(text[i])) {
118
+ if (startPos === undefined) {
119
+ startPos = i;
120
+ i = text.length;
121
+ }
122
+ else {
123
+ endPos = i;
124
+ i = void 0;
125
+ }
126
+ }
127
+ if (startPos === undefined)
128
+ i++;
129
+ else
130
+ i--;
131
+ }
132
+ if (startPos === undefined)
133
+ startPos = 0;
134
+ if (endPos === undefined)
135
+ endPos = text.length - 1;
136
+ var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
137
+ var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
138
+ return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
139
+ }
@@ -0,0 +1,6 @@
1
+ declare enum NodeType {
2
+ ELEMENT_NODE = 1,
3
+ TEXT_NODE = 3,
4
+ COMMENT_NODE = 8
5
+ }
6
+ export default NodeType;
@@ -0,0 +1,9 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ var NodeType;
4
+ (function (NodeType) {
5
+ NodeType[NodeType["ELEMENT_NODE"] = 1] = "ELEMENT_NODE";
6
+ NodeType[NodeType["TEXT_NODE"] = 3] = "TEXT_NODE";
7
+ NodeType[NodeType["COMMENT_NODE"] = 8] = "COMMENT_NODE";
8
+ })(NodeType || (NodeType = {}));
9
+ exports.default = NodeType;
@@ -0,0 +1 @@
1
+ export { parse as default } from './nodes/html';
package/dist/parse.js ADDED
@@ -0,0 +1,5 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.default = void 0;
4
+ var html_1 = require("./nodes/html");
5
+ Object.defineProperty(exports, "default", { enumerable: true, get: function () { return html_1.parse; } });
@@ -0,0 +1,6 @@
1
+ import { Options } from './nodes/html';
2
+ /**
3
+ * Parses HTML and returns a root element
4
+ * Parse a chuck of HTML source.
5
+ */
6
+ export default function valid(data: string, options?: Partial<Options>): boolean;
package/dist/valid.js ADDED
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ var html_1 = require("./nodes/html");
4
+ /**
5
+ * Parses HTML and returns a root element
6
+ * Parse a chuck of HTML source.
7
+ */
8
+ function valid(data, options) {
9
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
10
+ var stack = (0, html_1.base_parse)(data, options);
11
+ return Boolean(stack.length === 1);
12
+ }
13
+ exports.default = valid;
package/package.json ADDED
@@ -0,0 +1,88 @@
1
+ {
2
+ "name": "node-html-parser",
3
+ "version": "4.1.5",
4
+ "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
+ "main": "dist/index.js",
6
+ "module": "dist/esm/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "scripts": {
9
+ "test": "mocha",
10
+ "lint": "eslint ./src/*.ts ./src/**/*.ts",
11
+ "clean": "del-cli ./dist/",
12
+ "ts:cjs": "tsc -m commonjs",
13
+ "ts:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
14
+ "ts:esm": "tsc -t es2019 -m esnext -d false --outDir ./dist/esm/",
15
+ "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
16
+ "dev": "tsc -w & mocha -w ./test/*.js",
17
+ "pretest": "tsc -m commonjs",
18
+ "release": "yarn build && np",
19
+ "prepare": "npm run build"
20
+ },
21
+ "keywords": [
22
+ "parser",
23
+ "html",
24
+ "nodejs",
25
+ "typescript"
26
+ ],
27
+ "author": "Xiaoyi Shi <ashi009@gmail.com>",
28
+ "contributors": [
29
+ "taoqf<tao_qiufeng@126.com>"
30
+ ],
31
+ "license": "MIT",
32
+ "publishConfig": {
33
+ "registry": "https://registry.npmjs.org"
34
+ },
35
+ "dependencies": {
36
+ "css-select": "^4.1.3",
37
+ "he": "1.2.0"
38
+ },
39
+ "devDependencies": {
40
+ "@types/entities": "latest",
41
+ "@types/he": "latest",
42
+ "@types/node": "latest",
43
+ "@typescript-eslint/eslint-plugin": "latest",
44
+ "@typescript-eslint/eslint-plugin-tslint": "latest",
45
+ "@typescript-eslint/parser": "latest",
46
+ "blanket": "latest",
47
+ "cheerio": "^1.0.0-rc.5",
48
+ "del-cli": "latest",
49
+ "eslint": "latest",
50
+ "eslint-config-prettier": "latest",
51
+ "eslint-plugin-import": "latest",
52
+ "high5": "^1.0.0",
53
+ "htmlparser": "^1.7.7",
54
+ "htmlparser-benchmark": "^1.1.3",
55
+ "htmlparser2": "^6.0.0",
56
+ "mocha": "latest",
57
+ "mocha-each": "^2.0.1",
58
+ "np": "latest",
59
+ "parse5": "^6.0.1",
60
+ "should": "latest",
61
+ "spec": "latest",
62
+ "travis-cov": "latest",
63
+ "typescript": "next"
64
+ },
65
+ "config": {
66
+ "blanket": {
67
+ "pattern": "./dist/index.js",
68
+ "data-cover-never": [
69
+ "node_modules"
70
+ ]
71
+ },
72
+ "travis-cov": {
73
+ "threshold": 70
74
+ }
75
+ },
76
+ "directories": {
77
+ "test": "test"
78
+ },
79
+ "repository": {
80
+ "type": "git",
81
+ "url": "https://github.com/taoqf/node-fast-html-parser.git"
82
+ },
83
+ "bugs": {
84
+ "url": "https://github.com/taoqf/node-fast-html-parser/issues"
85
+ },
86
+ "homepage": "https://github.com/taoqf/node-fast-html-parser",
87
+ "sideEffects": false
88
+ }