node-html-parser 4.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +3 -0
- package/.eslintrc.json +226 -0
- package/.mocharc.yaml +1 -0
- package/.prettierrc +7 -0
- package/LICENSE +7 -0
- package/README.md +255 -0
- package/dist/back.d.ts +1 -0
- package/dist/back.js +6 -0
- package/dist/esm/back.js +3 -0
- package/dist/esm/index.js +7 -0
- package/dist/esm/matcher.js +101 -0
- package/dist/esm/nodes/comment.js +23 -0
- package/dist/esm/nodes/html.js +1048 -0
- package/dist/esm/nodes/node.js +25 -0
- package/dist/esm/nodes/text.js +95 -0
- package/dist/esm/nodes/type.js +7 -0
- package/dist/esm/parse.js +1 -0
- package/dist/esm/valid.js +9 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.js +21 -0
- package/dist/main.js +1542 -0
- package/dist/matcher.d.ts +6 -0
- package/dist/matcher.js +106 -0
- package/dist/nodes/comment.d.ts +18 -0
- package/dist/nodes/comment.js +51 -0
- package/dist/nodes/html.d.ts +206 -0
- package/dist/nodes/html.js +1188 -0
- package/dist/nodes/node.d.ts +18 -0
- package/dist/nodes/node.js +38 -0
- package/dist/nodes/text.d.ts +42 -0
- package/dist/nodes/text.js +139 -0
- package/dist/nodes/type.d.ts +6 -0
- package/dist/nodes/type.js +9 -0
- package/dist/parse.d.ts +1 -0
- package/dist/parse.js +5 -0
- package/dist/valid.d.ts +6 -0
- package/dist/valid.js +13 -0
- package/package.json +88 -0
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { Adapter } from 'css-select/lib/types';
|
|
2
|
+
import HTMLElement from './nodes/html';
|
|
3
|
+
import Node from './nodes/node';
|
|
4
|
+
export declare type Predicate = (node: Node) => node is HTMLElement;
|
|
5
|
+
declare const _default: Adapter<Node, HTMLElement>;
|
|
6
|
+
export default _default;
|
package/dist/matcher.js
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
var type_1 = __importDefault(require("./nodes/type"));
|
|
7
|
+
function isTag(node) {
|
|
8
|
+
return node && node.nodeType === type_1.default.ELEMENT_NODE;
|
|
9
|
+
}
|
|
10
|
+
function getAttributeValue(elem, name) {
|
|
11
|
+
return isTag(elem) ? elem.getAttribute(name) : undefined;
|
|
12
|
+
}
|
|
13
|
+
function getName(elem) {
|
|
14
|
+
return ((elem && elem.rawTagName) || '').toLowerCase();
|
|
15
|
+
}
|
|
16
|
+
function getChildren(node) {
|
|
17
|
+
return node && node.childNodes;
|
|
18
|
+
}
|
|
19
|
+
function getParent(node) {
|
|
20
|
+
return node ? node.parentNode : null;
|
|
21
|
+
}
|
|
22
|
+
function getText(node) {
|
|
23
|
+
return node.text;
|
|
24
|
+
}
|
|
25
|
+
function removeSubsets(nodes) {
|
|
26
|
+
var idx = nodes.length;
|
|
27
|
+
var node;
|
|
28
|
+
var ancestor;
|
|
29
|
+
var replace;
|
|
30
|
+
// Check if each node (or one of its ancestors) is already contained in the
|
|
31
|
+
// array.
|
|
32
|
+
while (--idx > -1) {
|
|
33
|
+
node = ancestor = nodes[idx];
|
|
34
|
+
// Temporarily remove the node under consideration
|
|
35
|
+
nodes[idx] = null;
|
|
36
|
+
replace = true;
|
|
37
|
+
while (ancestor) {
|
|
38
|
+
if (nodes.indexOf(ancestor) > -1) {
|
|
39
|
+
replace = false;
|
|
40
|
+
nodes.splice(idx, 1);
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
ancestor = getParent(ancestor);
|
|
44
|
+
}
|
|
45
|
+
// If the node has been found to be unique, re-insert it.
|
|
46
|
+
if (replace) {
|
|
47
|
+
nodes[idx] = node;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return nodes;
|
|
51
|
+
}
|
|
52
|
+
function existsOne(test, elems) {
|
|
53
|
+
return elems.some(function (elem) {
|
|
54
|
+
return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
function getSiblings(node) {
|
|
58
|
+
var parent = getParent(node);
|
|
59
|
+
return parent && getChildren(parent);
|
|
60
|
+
}
|
|
61
|
+
function hasAttrib(elem, name) {
|
|
62
|
+
return getAttributeValue(elem, name) !== undefined;
|
|
63
|
+
}
|
|
64
|
+
function findOne(test, elems) {
|
|
65
|
+
var elem = null;
|
|
66
|
+
for (var i = 0, l = elems.length; i < l && !elem; i++) {
|
|
67
|
+
var el = elems[i];
|
|
68
|
+
if (test(el)) {
|
|
69
|
+
elem = el;
|
|
70
|
+
}
|
|
71
|
+
else {
|
|
72
|
+
var childs = getChildren(el);
|
|
73
|
+
if (childs && childs.length > 0) {
|
|
74
|
+
elem = findOne(test, childs);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return elem;
|
|
79
|
+
}
|
|
80
|
+
function findAll(test, nodes) {
|
|
81
|
+
var result = [];
|
|
82
|
+
for (var i = 0, j = nodes.length; i < j; i++) {
|
|
83
|
+
if (!isTag(nodes[i]))
|
|
84
|
+
continue;
|
|
85
|
+
if (test(nodes[i]))
|
|
86
|
+
result.push(nodes[i]);
|
|
87
|
+
var childs = getChildren(nodes[i]);
|
|
88
|
+
if (childs)
|
|
89
|
+
result = result.concat(findAll(test, childs));
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
exports.default = {
|
|
94
|
+
isTag: isTag,
|
|
95
|
+
getAttributeValue: getAttributeValue,
|
|
96
|
+
getName: getName,
|
|
97
|
+
getChildren: getChildren,
|
|
98
|
+
getParent: getParent,
|
|
99
|
+
getText: getText,
|
|
100
|
+
removeSubsets: removeSubsets,
|
|
101
|
+
existsOne: existsOne,
|
|
102
|
+
getSiblings: getSiblings,
|
|
103
|
+
hasAttrib: hasAttrib,
|
|
104
|
+
findOne: findOne,
|
|
105
|
+
findAll: findAll
|
|
106
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import Node from './node';
|
|
2
|
+
import NodeType from './type';
|
|
3
|
+
import HTMLElement from './html';
|
|
4
|
+
export default class CommentNode extends Node {
|
|
5
|
+
rawText: string;
|
|
6
|
+
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
7
|
+
/**
|
|
8
|
+
* Node Type declaration.
|
|
9
|
+
* @type {Number}
|
|
10
|
+
*/
|
|
11
|
+
nodeType: NodeType;
|
|
12
|
+
/**
|
|
13
|
+
* Get unescaped text value of current node and its children.
|
|
14
|
+
* @return {string} text content
|
|
15
|
+
*/
|
|
16
|
+
get text(): string;
|
|
17
|
+
toString(): string;
|
|
18
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __extends = (this && this.__extends) || (function () {
|
|
3
|
+
var extendStatics = function (d, b) {
|
|
4
|
+
extendStatics = Object.setPrototypeOf ||
|
|
5
|
+
({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
|
|
6
|
+
function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
|
|
7
|
+
return extendStatics(d, b);
|
|
8
|
+
};
|
|
9
|
+
return function (d, b) {
|
|
10
|
+
if (typeof b !== "function" && b !== null)
|
|
11
|
+
throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
|
|
12
|
+
extendStatics(d, b);
|
|
13
|
+
function __() { this.constructor = d; }
|
|
14
|
+
d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
|
|
15
|
+
};
|
|
16
|
+
})();
|
|
17
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
18
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
19
|
+
};
|
|
20
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
21
|
+
var node_1 = __importDefault(require("./node"));
|
|
22
|
+
var type_1 = __importDefault(require("./type"));
|
|
23
|
+
var CommentNode = /** @class */ (function (_super) {
|
|
24
|
+
__extends(CommentNode, _super);
|
|
25
|
+
function CommentNode(rawText, parentNode, range) {
|
|
26
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
27
|
+
_this.rawText = rawText;
|
|
28
|
+
/**
|
|
29
|
+
* Node Type declaration.
|
|
30
|
+
* @type {Number}
|
|
31
|
+
*/
|
|
32
|
+
_this.nodeType = type_1.default.COMMENT_NODE;
|
|
33
|
+
return _this;
|
|
34
|
+
}
|
|
35
|
+
Object.defineProperty(CommentNode.prototype, "text", {
|
|
36
|
+
/**
|
|
37
|
+
* Get unescaped text value of current node and its children.
|
|
38
|
+
* @return {string} text content
|
|
39
|
+
*/
|
|
40
|
+
get: function () {
|
|
41
|
+
return this.rawText;
|
|
42
|
+
},
|
|
43
|
+
enumerable: false,
|
|
44
|
+
configurable: true
|
|
45
|
+
});
|
|
46
|
+
CommentNode.prototype.toString = function () {
|
|
47
|
+
return "<!--" + this.rawText + "-->";
|
|
48
|
+
};
|
|
49
|
+
return CommentNode;
|
|
50
|
+
}(node_1.default));
|
|
51
|
+
exports.default = CommentNode;
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import Node from './node';
|
|
2
|
+
import NodeType from './type';
|
|
3
|
+
export interface KeyAttributes {
|
|
4
|
+
id?: string;
|
|
5
|
+
class?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface Attributes {
|
|
8
|
+
[key: string]: string;
|
|
9
|
+
}
|
|
10
|
+
export interface RawAttributes {
|
|
11
|
+
[key: string]: string;
|
|
12
|
+
}
|
|
13
|
+
export declare type InsertPosition = 'beforebegin' | 'afterbegin' | 'beforeend' | 'afterend';
|
|
14
|
+
declare class DOMTokenList {
|
|
15
|
+
private _set;
|
|
16
|
+
private _afterUpdate;
|
|
17
|
+
private _validate;
|
|
18
|
+
constructor(valuesInit?: string[], afterUpdate?: (t: DOMTokenList) => void);
|
|
19
|
+
add(c: string): void;
|
|
20
|
+
replace(c1: string, c2: string): void;
|
|
21
|
+
remove(c: string): void;
|
|
22
|
+
toggle(c: string): void;
|
|
23
|
+
contains(c: string): boolean;
|
|
24
|
+
get length(): number;
|
|
25
|
+
values(): IterableIterator<string>;
|
|
26
|
+
get value(): string[];
|
|
27
|
+
toString(): string;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* HTMLElement, which contains a set of children.
|
|
31
|
+
*
|
|
32
|
+
* Note: this is a minimalist implementation, no complete tree
|
|
33
|
+
* structure provided (no parentNode, nextSibling,
|
|
34
|
+
* previousSibling etc).
|
|
35
|
+
* @class HTMLElement
|
|
36
|
+
* @extends {Node}
|
|
37
|
+
*/
|
|
38
|
+
export default class HTMLElement extends Node {
|
|
39
|
+
private rawAttrs;
|
|
40
|
+
private _attrs;
|
|
41
|
+
private _rawAttrs;
|
|
42
|
+
rawTagName: string;
|
|
43
|
+
id: string;
|
|
44
|
+
classList: DOMTokenList;
|
|
45
|
+
/**
|
|
46
|
+
* Node Type declaration.
|
|
47
|
+
*/
|
|
48
|
+
nodeType: NodeType;
|
|
49
|
+
/**
|
|
50
|
+
* Quote attribute values
|
|
51
|
+
* @param attr attribute value
|
|
52
|
+
* @returns {string} quoted value
|
|
53
|
+
*/
|
|
54
|
+
private quoteAttribute;
|
|
55
|
+
/**
|
|
56
|
+
* Creates an instance of HTMLElement.
|
|
57
|
+
* @param keyAttrs id and class attribute
|
|
58
|
+
* @param [rawAttrs] attributes in string
|
|
59
|
+
*
|
|
60
|
+
* @memberof HTMLElement
|
|
61
|
+
*/
|
|
62
|
+
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range?: [number, number]);
|
|
63
|
+
/**
|
|
64
|
+
* Remove current element
|
|
65
|
+
*/
|
|
66
|
+
remove(): void;
|
|
67
|
+
/**
|
|
68
|
+
* Remove Child element from childNodes array
|
|
69
|
+
* @param {HTMLElement} node node to remove
|
|
70
|
+
*/
|
|
71
|
+
removeChild(node: Node): void;
|
|
72
|
+
/**
|
|
73
|
+
* Exchanges given child with new child
|
|
74
|
+
* @param {HTMLElement} oldNode node to exchange
|
|
75
|
+
* @param {HTMLElement} newNode new node
|
|
76
|
+
*/
|
|
77
|
+
exchangeChild(oldNode: Node, newNode: Node): void;
|
|
78
|
+
get tagName(): string;
|
|
79
|
+
get localName(): string;
|
|
80
|
+
/**
|
|
81
|
+
* Get escpaed (as-it) text value of current node and its children.
|
|
82
|
+
* @return {string} text content
|
|
83
|
+
*/
|
|
84
|
+
get rawText(): string;
|
|
85
|
+
get textContent(): string;
|
|
86
|
+
set textContent(val: string);
|
|
87
|
+
/**
|
|
88
|
+
* Get unescaped text value of current node and its children.
|
|
89
|
+
* @return {string} text content
|
|
90
|
+
*/
|
|
91
|
+
get text(): string;
|
|
92
|
+
/**
|
|
93
|
+
* Get structured Text (with '\n' etc.)
|
|
94
|
+
* @return {string} structured text
|
|
95
|
+
*/
|
|
96
|
+
get structuredText(): string;
|
|
97
|
+
toString(): string;
|
|
98
|
+
get innerHTML(): string;
|
|
99
|
+
set innerHTML(content: string);
|
|
100
|
+
set_content(content: string | Node | Node[], options?: Options): void;
|
|
101
|
+
replaceWith(...nodes: (string | Node)[]): void;
|
|
102
|
+
get outerHTML(): string;
|
|
103
|
+
/**
|
|
104
|
+
* Trim element from right (in block) after seeing pattern in a TextNode.
|
|
105
|
+
* @param {RegExp} pattern pattern to find
|
|
106
|
+
* @return {HTMLElement} reference to current node
|
|
107
|
+
*/
|
|
108
|
+
trimRight(pattern: RegExp): this;
|
|
109
|
+
/**
|
|
110
|
+
* Get DOM structure
|
|
111
|
+
* @return {string} strucutre
|
|
112
|
+
*/
|
|
113
|
+
get structure(): string;
|
|
114
|
+
/**
|
|
115
|
+
* Remove whitespaces in this sub tree.
|
|
116
|
+
* @return {HTMLElement} pointer to this
|
|
117
|
+
*/
|
|
118
|
+
removeWhitespace(): this;
|
|
119
|
+
/**
|
|
120
|
+
* Query CSS selector to find matching nodes.
|
|
121
|
+
* @param {string} selector Simplified CSS selector
|
|
122
|
+
* @return {HTMLElement[]} matching elements
|
|
123
|
+
*/
|
|
124
|
+
querySelectorAll(selector: string): HTMLElement[];
|
|
125
|
+
/**
|
|
126
|
+
* Query CSS Selector to find matching node.
|
|
127
|
+
* @param {string} selector Simplified CSS selector
|
|
128
|
+
* @return {HTMLElement} matching node
|
|
129
|
+
*/
|
|
130
|
+
querySelector(selector: string): HTMLElement;
|
|
131
|
+
/**
|
|
132
|
+
* traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
|
|
133
|
+
* @param selector a DOMString containing a selector list
|
|
134
|
+
*/
|
|
135
|
+
closest(selector: string): Node;
|
|
136
|
+
/**
|
|
137
|
+
* Append a child node to childNodes
|
|
138
|
+
* @param {Node} node node to append
|
|
139
|
+
* @return {Node} node appended
|
|
140
|
+
*/
|
|
141
|
+
appendChild<T extends Node = Node>(node: T): T;
|
|
142
|
+
/**
|
|
143
|
+
* Get first child node
|
|
144
|
+
* @return {Node} first child node
|
|
145
|
+
*/
|
|
146
|
+
get firstChild(): Node;
|
|
147
|
+
/**
|
|
148
|
+
* Get last child node
|
|
149
|
+
* @return {Node} last child node
|
|
150
|
+
*/
|
|
151
|
+
get lastChild(): Node;
|
|
152
|
+
/**
|
|
153
|
+
* Get attributes
|
|
154
|
+
* @access private
|
|
155
|
+
* @return {Object} parsed and unescaped attributes
|
|
156
|
+
*/
|
|
157
|
+
get attrs(): Attributes;
|
|
158
|
+
get attributes(): Record<string, string>;
|
|
159
|
+
/**
|
|
160
|
+
* Get escaped (as-it) attributes
|
|
161
|
+
* @return {Object} parsed attributes
|
|
162
|
+
*/
|
|
163
|
+
get rawAttributes(): RawAttributes;
|
|
164
|
+
removeAttribute(key: string): void;
|
|
165
|
+
hasAttribute(key: string): boolean;
|
|
166
|
+
/**
|
|
167
|
+
* Get an attribute
|
|
168
|
+
* @return {string} value of the attribute
|
|
169
|
+
*/
|
|
170
|
+
getAttribute(key: string): string | undefined;
|
|
171
|
+
/**
|
|
172
|
+
* Set an attribute value to the HTMLElement
|
|
173
|
+
* @param {string} key The attribute name
|
|
174
|
+
* @param {string} value The value to set, or null / undefined to remove an attribute
|
|
175
|
+
*/
|
|
176
|
+
setAttribute(key: string, value: string): void;
|
|
177
|
+
/**
|
|
178
|
+
* Replace all the attributes of the HTMLElement by the provided attributes
|
|
179
|
+
* @param {Attributes} attributes the new attribute set
|
|
180
|
+
*/
|
|
181
|
+
setAttributes(attributes: Attributes): void;
|
|
182
|
+
insertAdjacentHTML(where: InsertPosition, html: string): void;
|
|
183
|
+
get nextSibling(): Node;
|
|
184
|
+
get nextElementSibling(): HTMLElement;
|
|
185
|
+
get classNames(): string;
|
|
186
|
+
}
|
|
187
|
+
export interface Options {
|
|
188
|
+
lowerCaseTagName: boolean;
|
|
189
|
+
comment: boolean;
|
|
190
|
+
blockTextElements: {
|
|
191
|
+
[tag: string]: boolean;
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Parses HTML and returns a root element
|
|
196
|
+
* Parse a chuck of HTML source.
|
|
197
|
+
* @param {string} data html
|
|
198
|
+
* @return {HTMLElement} root element
|
|
199
|
+
*/
|
|
200
|
+
export declare function base_parse(data: string, options?: Partial<Options>): HTMLElement[];
|
|
201
|
+
/**
|
|
202
|
+
* Parses HTML and returns a root element
|
|
203
|
+
* Parse a chuck of HTML source.
|
|
204
|
+
*/
|
|
205
|
+
export declare function parse(data: string, options?: Partial<Options>): HTMLElement;
|
|
206
|
+
export {};
|