node-html-parser 4.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintignore ADDED
@@ -0,0 +1,3 @@
1
+ test/
2
+ dist/
3
+ node_modules/
package/.eslintrc.json ADDED
@@ -0,0 +1,226 @@
1
+ {
2
+ "env": {
3
+ "es6": true,
4
+ "node": true
5
+ },
6
+ "extends": [
7
+ "eslint:recommended",
8
+ "plugin:@typescript-eslint/recommended",
9
+ "plugin:@typescript-eslint/recommended-requiring-type-checking",
10
+ "plugin:@typescript-eslint/eslint-recommended",
11
+ "plugin:import/errors",
12
+ "plugin:import/warnings",
13
+ "plugin:import/typescript",
14
+ "prettier"
15
+ ],
16
+ "parser": "@typescript-eslint/parser",
17
+ "parserOptions": {
18
+ "ecmaVersion": 2018,
19
+ "sourceType": "module",
20
+ "ecmaFeatures": {
21
+ "impliedStrict": true,
22
+ "jsx": true
23
+ },
24
+ "project": "tsconfig.json",
25
+ "extraFileExtensions": [
26
+ ".ts",
27
+ ".tsx"
28
+ ]
29
+ },
30
+ "plugins": [
31
+ "import",
32
+ "@typescript-eslint"
33
+ ],
34
+ "rules": {
35
+ "no-prototype-builtins": "off",
36
+ "@typescript-eslint/explicit-module-boundary-types": "off",
37
+ "arrow-parens": [
38
+ "off"
39
+ ],
40
+ "brace-style": [
41
+ "off",
42
+ "stroustrup"
43
+ ],
44
+ "consistent-return": "off",
45
+ "camelcase": "off",
46
+ "@typescript-eslint/camelcase": "off",
47
+ "curly": [
48
+ "error",
49
+ "multi-line",
50
+ "consistent"
51
+ ],
52
+ "eol-last": "error",
53
+ "linebreak-style": [
54
+ "error",
55
+ "unix"
56
+ ],
57
+ "new-parens": "error",
58
+ "no-console": "off",
59
+ "no-constant-condition": [
60
+ "warn",
61
+ {
62
+ "checkLoops": false
63
+ }
64
+ ],
65
+ "no-caller": "error",
66
+ "no-debugger": "warn",
67
+ "no-dupe-class-members": "off",
68
+ "no-duplicate-imports": "error",
69
+ "no-else-return": "warn",
70
+ "no-empty": [
71
+ "warn",
72
+ {
73
+ "allowEmptyCatch": true
74
+ }
75
+ ],
76
+ "no-eval": "error",
77
+ "no-ex-assign": "warn",
78
+ "no-extend-native": "error",
79
+ "no-extra-bind": "error",
80
+ "no-floating-decimal": "error",
81
+ "no-implicit-coercion": "error",
82
+ "no-implied-eval": "error",
83
+ // Turn off until fix for: https://github.com/typescript-eslint/typescript-eslint/issues/239
84
+ "no-inner-declarations": "off",
85
+ "no-lone-blocks": "error",
86
+ "no-lonely-if": "error",
87
+ "no-loop-func": "error",
88
+ "no-multi-spaces": "error",
89
+ "no-return-assign": "error",
90
+ "no-return-await": "warn",
91
+ "no-self-compare": "error",
92
+ "no-sequences": "error",
93
+ "no-template-curly-in-string": "warn",
94
+ "no-throw-literal": "error",
95
+ "no-unmodified-loop-condition": "warn",
96
+ "no-unneeded-ternary": "error",
97
+ "no-unused-expressions": [
98
+ "warn",
99
+ {
100
+ "allowShortCircuit": true
101
+ }
102
+ ],
103
+ "no-use-before-define": "off",
104
+ "no-useless-call": "error",
105
+ "no-useless-catch": "error",
106
+ "no-useless-computed-key": "error",
107
+ "no-useless-concat": "error",
108
+ "no-useless-rename": "error",
109
+ "no-useless-return": "error",
110
+ "no-var": "error",
111
+ "no-with": "error",
112
+ "object-shorthand": "off",
113
+ "one-var": [
114
+ "error",
115
+ "never"
116
+ ],
117
+ "prefer-arrow-callback": "error",
118
+ "prefer-const": "error",
119
+ "prefer-numeric-literals": "error",
120
+ "prefer-object-spread": "error",
121
+ "prefer-rest-params": "error",
122
+ "prefer-spread": "error",
123
+ "prefer-template": "error",
124
+ "quotes": "off",
125
+ // Turn off until fix for: https://github.com/eslint/eslint/issues/11899
126
+ "require-atomic-updates": "off",
127
+ "semi": [
128
+ "error",
129
+ "always"
130
+ ],
131
+ "semi-style": [
132
+ "error",
133
+ "last"
134
+ ],
135
+ "sort-imports": [
136
+ "error",
137
+ {
138
+ "ignoreCase": true,
139
+ "ignoreDeclarationSort": true,
140
+ "ignoreMemberSort": false,
141
+ "memberSyntaxSortOrder": [
142
+ "none",
143
+ "all",
144
+ "multiple",
145
+ "single"
146
+ ]
147
+ }
148
+ ],
149
+ "yoda": "error",
150
+ "import/export": "off",
151
+ "import/extensions": [
152
+ "error",
153
+ "never"
154
+ ],
155
+ "import/named": "off",
156
+ "import/namespace": "off",
157
+ "import/newline-after-import": "warn",
158
+ "import/no-cycle": "off",
159
+ "import/no-dynamic-require": "error",
160
+ "import/no-default-export": "off",
161
+ "import/no-duplicates": "error",
162
+ "import/no-self-import": "error",
163
+ "import/no-unresolved": [
164
+ "warn",
165
+ {
166
+ "ignore": [
167
+ "vscode"
168
+ ]
169
+ }
170
+ ],
171
+ "import/order": [
172
+ "warn",
173
+ {
174
+ "groups": [
175
+ "builtin",
176
+ "external",
177
+ "internal",
178
+ [
179
+ "index",
180
+ "sibling",
181
+ "parent"
182
+ ]
183
+ ]
184
+ }
185
+ ],
186
+ "@typescript-eslint/require-await": "off",
187
+ "@typescript-eslint/consistent-type-assertions": "off",
188
+ "@typescript-eslint/explicit-function-return-type": "off",
189
+ "@typescript-eslint/explicit-member-accessibility": "error",
190
+ "@typescript-eslint/interface-name-prefix": "off",
191
+ "@typescript-eslint/no-empty-function": [
192
+ "warn",
193
+ {
194
+ "allow": [
195
+ "constructors"
196
+ ]
197
+ }
198
+ ],
199
+ "@typescript-eslint/no-empty-interface": "off",
200
+ "@typescript-eslint/no-explicit-any": "error",
201
+ "@typescript-eslint/no-inferrable-types": [
202
+ "warn",
203
+ {
204
+ "ignoreParameters": true,
205
+ "ignoreProperties": true
206
+ }
207
+ ],
208
+ "@typescript-eslint/no-misused-promises": [
209
+ "error",
210
+ {
211
+ "checksVoidReturn": false
212
+ }
213
+ ],
214
+ "@typescript-eslint/no-namespace": "off",
215
+ "@typescript-eslint/no-non-null-assertion": "off",
216
+ "@typescript-eslint/no-parameter-properties": "off",
217
+ "@typescript-eslint/no-unused-vars": [
218
+ "warn",
219
+ {
220
+ "args": "none"
221
+ }
222
+ ],
223
+ "@typescript-eslint/no-use-before-define": "off",
224
+ "@typescript-eslint/unbound-method": "off" // Too many bugs right now: https://github.com/typescript-eslint/typescript-eslint/issues?utf8=%E2%9C%93&q=is%3Aissue+is%3Aopen+unbound-method
225
+ }
226
+ }
package/.mocharc.yaml ADDED
@@ -0,0 +1 @@
1
+ require: blanket,should,spec
package/.prettierrc ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "trailingComma": "es5",
3
+ "useTabs": true,
4
+ "tabWidth": 4,
5
+ "printWidth": 140,
6
+ "singleQuote": true
7
+ }
package/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2019 Tao Qiufeng
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,255 @@
1
+ # Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser)
2
+
3
+ Fast HTML Parser is a _very fast_ HTML parser. Which will generate a simplified
4
+ DOM tree, with element query support.
5
+
6
+ Per the design, it intends to parse massive HTML files in lowest price, thus the
7
+ performance is the top priority. For this reason, some malformatted HTML may not
8
+ be able to parse correctly, but most usual errors are covered (eg. HTML4 style
9
+ no closing `<li>`, `<td>` etc).
10
+
11
+ ## Install
12
+
13
+
14
+ ```shell
15
+ npm install --save node-html-parser
16
+ ```
17
+
18
+ > Note: when using Fast HTML Parser in a Typescript project the minimum Typescript version supported is `^4.1.2`.
19
+
20
+ ## Performance
21
+
22
+ Faster than htmlparser2!
23
+
24
+ ```shell
25
+ htmlparser :26.7111 ms/file ± 170.066
26
+ cheerio :24.2480 ms/file ± 17.1711
27
+ parse5 :13.7239 ms/file ± 8.68561
28
+ high5 :7.75466 ms/file ± 5.33549
29
+ htmlparser2 :5.27376 ms/file ± 8.68456
30
+ node-html-parser:2.85768 ms/file ± 2.87784
31
+ ```
32
+
33
+ Tested with [htmlparser-benchmark](https://github.com/AndreasMadsen/htmlparser-benchmark).
34
+
35
+ ## Usage
36
+
37
+ ```ts
38
+ import { parse } from 'node-html-parser';
39
+
40
+ const root = parse('<ul id="list"><li>Hello World</li></ul>');
41
+
42
+ console.log(root.firstChild.structure);
43
+ // ul#list
44
+ // li
45
+ // #text
46
+
47
+ console.log(root.querySelector('#list'));
48
+ // { tagName: 'ul',
49
+ // rawAttrs: 'id="list"',
50
+ // childNodes:
51
+ // [ { tagName: 'li',
52
+ // rawAttrs: '',
53
+ // childNodes: [Object],
54
+ // classNames: [] } ],
55
+ // id: 'list',
56
+ // classNames: [] }
57
+ console.log(root.toString());
58
+ // <ul id="list"><li>Hello World</li></ul>
59
+ root.set_content('<li>Hello World</li>');
60
+ root.toString(); // <li>Hello World</li>
61
+ ```
62
+
63
+ ```js
64
+ var HTMLParser = require('node-html-parser');
65
+
66
+ var root = HTMLParser.parse('<ul id="list"><li>Hello World</li></ul>');
67
+ ```
68
+
69
+ ## Global Methods
70
+
71
+ ### parse(data[, options])
72
+
73
+ Parse given data, and return root of the generated DOM.
74
+
75
+ - **data**, data to parse
76
+ - **options**, parse options
77
+
78
+ ```js
79
+ {
80
+ lowerCaseTagName: false, // convert tag name to lower case (hurt performance heavily)
81
+ comment: false // retrieve comments (hurt performance slightly)
82
+ blockTextElements: {
83
+ script: true, // keep text content when parsing
84
+ noscript: true, // keep text content when parsing
85
+ style: true, // keep text content when parsing
86
+ pre: true // keep text content when parsing
87
+ }
88
+ }
89
+ ```
90
+
91
+ ### valid(data[, options])
92
+
93
+ Parse given data, return true if the givent data is valid, and return false if not.
94
+
95
+ ## HTMLElement Methods
96
+
97
+ ### HTMLElement#trimRight()
98
+
99
+ Trim element from right (in block) after seeing pattern in a TextNode.
100
+
101
+ ### HTMLElement#removeWhitespace()
102
+
103
+ Remove whitespaces in this sub tree.
104
+
105
+ ### HTMLElement#querySelectorAll(selector)
106
+
107
+ Query CSS selector to find matching nodes.
108
+
109
+ Note: Full css3 selector supported since v3.0.0.
110
+
111
+ ### HTMLElement#querySelector(selector)
112
+
113
+ Query CSS Selector to find matching node.
114
+
115
+ ### HTMLElement#closest(selector)
116
+
117
+ Query closest element by css selector.
118
+
119
+ ### HTMLElement#appendChild(node)
120
+
121
+ Append a child node to childNodes
122
+
123
+ ### HTMLElement#insertAdjacentHTML(where, html)
124
+
125
+ parses the specified text as HTML and inserts the resulting nodes into the DOM tree at a specified position.
126
+
127
+ ### HTMLElement#setAttribute(key: string, value: string)
128
+
129
+ Set `value` to `key` attribute.
130
+
131
+ ### HTMLElement#setAttributes(attrs: Record<string, string>)
132
+
133
+ Set attributes of the element.
134
+
135
+ ### HTMLElement#removeAttribute(key: string)
136
+
137
+ Remove `key` attribute.
138
+
139
+ ### HTMLElement#getAttribute(key: string)
140
+
141
+ Get `key` attribute.
142
+
143
+ ### HTMLElement#exchangeChild(oldNode: Node, newNode: Node)
144
+
145
+ Exchanges given child with new child.
146
+
147
+ ### HTMLElement#removeChild(node: Node)
148
+
149
+ Remove child node.
150
+
151
+ ### HTMLElement#toString()
152
+
153
+ Same as [outerHTML](#htmlelementouterhtml)
154
+
155
+ ### HTMLElement#set_content(content: string | Node | Node[])
156
+
157
+ Set content. **Notice**: Do not set content of the **root** node.
158
+
159
+ ### HTMLElement#remove()
160
+
161
+ Remove current element.
162
+
163
+ ### HTMLElement#replaceWith(...nodes: (string | Node)[])
164
+
165
+ Replace current element with other node(s).
166
+
167
+ ### HTMLElement#classList
168
+
169
+ #### HTMLElement#classList.add
170
+
171
+ Add class name.
172
+
173
+ #### HTMLElement#classList.replace(old: string, new: string)
174
+
175
+ Replace class name with another one.
176
+
177
+ #### HTMLElement#classList.remove()
178
+
179
+ Remove class name.
180
+
181
+ #### HTMLElement#classList.toggle(className: string):void
182
+
183
+ Toggle class.
184
+
185
+ #### HTMLElement#classList.contains(className: string): boolean
186
+
187
+ Get if contains
188
+
189
+ #### HTMLElement#classList.values()
190
+
191
+ get class names
192
+
193
+ ## HTMLElement Properties
194
+
195
+ ### HTMLElement#text
196
+
197
+ Get unescaped text value of current node and its children. Like `innerText`.
198
+ (slow for the first time)
199
+
200
+ ### HTMLElement#rawText
201
+
202
+ Get escpaed (as-it) text value of current node and its children. May have
203
+ `&amp;` in it. (fast)
204
+
205
+ ### HTMLElement#tagName
206
+
207
+ Get tag name of HTMLElement. Notice: the returned value would be an uppercase string.
208
+
209
+ ### HTMLElement#structuredText
210
+
211
+ Get structured Text
212
+
213
+ ### HTMLElement#structure
214
+
215
+ Get DOM structure
216
+
217
+ ### HTMLElement#firstChild
218
+
219
+ Get first child node
220
+
221
+ ### HTMLElement#lastChild
222
+
223
+ Get last child node
224
+
225
+ ### HTMLElement#innerHTML
226
+
227
+ Set or Get innerHTML.
228
+
229
+ ### HTMLElement#outerHTML
230
+
231
+ Get outerHTML.
232
+
233
+ ### HTMLElement#nextSibling
234
+
235
+ Returns a reference to the next child node of the current element's parent.
236
+
237
+ ### HTMLElement#nextElementSibling
238
+
239
+ Returns a reference to the next child element of the current element's parent.
240
+
241
+ ### HTMLElement#textContent
242
+
243
+ Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
244
+
245
+ ### HTMLElement#attributes
246
+
247
+ Get all attributes of current element. **Notice: do not try to change the returned value.**
248
+
249
+ ### HTMLElement#classList
250
+
251
+ Get all attributes of current element. **Notice: do not try to change the returned value.**
252
+
253
+ ### HTMLElement#range
254
+
255
+ Corresponding source code start and end indexes (ie [ 0, 40 ])
package/dist/back.d.ts ADDED
@@ -0,0 +1 @@
1
+ export default function arr_back<T>(arr: T[]): T;
package/dist/back.js ADDED
@@ -0,0 +1,6 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ function arr_back(arr) {
4
+ return arr[arr.length - 1];
5
+ }
6
+ exports.default = arr_back;
@@ -0,0 +1,3 @@
1
+ export default function arr_back(arr) {
2
+ return arr[arr.length - 1];
3
+ }
@@ -0,0 +1,7 @@
1
+ export { default as CommentNode } from './nodes/comment';
2
+ export { default as HTMLElement } from './nodes/html';
3
+ export { default as parse, default } from './parse';
4
+ export { default as valid } from './valid';
5
+ export { default as Node } from './nodes/node';
6
+ export { default as TextNode } from './nodes/text';
7
+ export { default as NodeType } from './nodes/type';
@@ -0,0 +1,101 @@
1
+ import NodeType from './nodes/type';
2
+ function isTag(node) {
3
+ return node && node.nodeType === NodeType.ELEMENT_NODE;
4
+ }
5
+ function getAttributeValue(elem, name) {
6
+ return isTag(elem) ? elem.getAttribute(name) : undefined;
7
+ }
8
+ function getName(elem) {
9
+ return ((elem && elem.rawTagName) || '').toLowerCase();
10
+ }
11
+ function getChildren(node) {
12
+ return node && node.childNodes;
13
+ }
14
+ function getParent(node) {
15
+ return node ? node.parentNode : null;
16
+ }
17
+ function getText(node) {
18
+ return node.text;
19
+ }
20
+ function removeSubsets(nodes) {
21
+ let idx = nodes.length;
22
+ let node;
23
+ let ancestor;
24
+ let replace;
25
+ // Check if each node (or one of its ancestors) is already contained in the
26
+ // array.
27
+ while (--idx > -1) {
28
+ node = ancestor = nodes[idx];
29
+ // Temporarily remove the node under consideration
30
+ nodes[idx] = null;
31
+ replace = true;
32
+ while (ancestor) {
33
+ if (nodes.indexOf(ancestor) > -1) {
34
+ replace = false;
35
+ nodes.splice(idx, 1);
36
+ break;
37
+ }
38
+ ancestor = getParent(ancestor);
39
+ }
40
+ // If the node has been found to be unique, re-insert it.
41
+ if (replace) {
42
+ nodes[idx] = node;
43
+ }
44
+ }
45
+ return nodes;
46
+ }
47
+ function existsOne(test, elems) {
48
+ return elems.some((elem) => {
49
+ return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
50
+ });
51
+ }
52
+ function getSiblings(node) {
53
+ const parent = getParent(node);
54
+ return parent && getChildren(parent);
55
+ }
56
+ function hasAttrib(elem, name) {
57
+ return getAttributeValue(elem, name) !== undefined;
58
+ }
59
+ function findOne(test, elems) {
60
+ let elem = null;
61
+ for (let i = 0, l = elems.length; i < l && !elem; i++) {
62
+ const el = elems[i];
63
+ if (test(el)) {
64
+ elem = el;
65
+ }
66
+ else {
67
+ const childs = getChildren(el);
68
+ if (childs && childs.length > 0) {
69
+ elem = findOne(test, childs);
70
+ }
71
+ }
72
+ }
73
+ return elem;
74
+ }
75
+ function findAll(test, nodes) {
76
+ let result = [];
77
+ for (let i = 0, j = nodes.length; i < j; i++) {
78
+ if (!isTag(nodes[i]))
79
+ continue;
80
+ if (test(nodes[i]))
81
+ result.push(nodes[i]);
82
+ const childs = getChildren(nodes[i]);
83
+ if (childs)
84
+ result = result.concat(findAll(test, childs));
85
+ }
86
+ return result;
87
+ }
88
+ export default {
89
+ isTag,
90
+ getAttributeValue,
91
+ getName,
92
+ getChildren,
93
+ getParent,
94
+ getText,
95
+ removeSubsets,
96
+ existsOne,
97
+ getSiblings,
98
+ hasAttrib,
99
+ findOne,
100
+ findAll
101
+ };
@@ -0,0 +1,23 @@
1
+ import Node from './node';
2
+ import NodeType from './type';
3
+ export default class CommentNode extends Node {
4
+ constructor(rawText, parentNode, range) {
5
+ super(parentNode, range);
6
+ this.rawText = rawText;
7
+ /**
8
+ * Node Type declaration.
9
+ * @type {Number}
10
+ */
11
+ this.nodeType = NodeType.COMMENT_NODE;
12
+ }
13
+ /**
14
+ * Get unescaped text value of current node and its children.
15
+ * @return {string} text content
16
+ */
17
+ get text() {
18
+ return this.rawText;
19
+ }
20
+ toString() {
21
+ return `<!--${this.rawText}-->`;
22
+ }
23
+ }