node-html-parser 4.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ import { Adapter } from 'css-select/lib/types';
2
+ import HTMLElement from './nodes/html';
3
+ import Node from './nodes/node';
4
+ export declare type Predicate = (node: Node) => node is HTMLElement;
5
+ declare const _default: Adapter<Node, HTMLElement>;
6
+ export default _default;
@@ -0,0 +1,106 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ var type_1 = __importDefault(require("./nodes/type"));
7
+ function isTag(node) {
8
+ return node && node.nodeType === type_1.default.ELEMENT_NODE;
9
+ }
10
+ function getAttributeValue(elem, name) {
11
+ return isTag(elem) ? elem.getAttribute(name) : undefined;
12
+ }
13
+ function getName(elem) {
14
+ return ((elem && elem.rawTagName) || '').toLowerCase();
15
+ }
16
+ function getChildren(node) {
17
+ return node && node.childNodes;
18
+ }
19
+ function getParent(node) {
20
+ return node ? node.parentNode : null;
21
+ }
22
+ function getText(node) {
23
+ return node.text;
24
+ }
25
+ function removeSubsets(nodes) {
26
+ var idx = nodes.length;
27
+ var node;
28
+ var ancestor;
29
+ var replace;
30
+ // Check if each node (or one of its ancestors) is already contained in the
31
+ // array.
32
+ while (--idx > -1) {
33
+ node = ancestor = nodes[idx];
34
+ // Temporarily remove the node under consideration
35
+ nodes[idx] = null;
36
+ replace = true;
37
+ while (ancestor) {
38
+ if (nodes.indexOf(ancestor) > -1) {
39
+ replace = false;
40
+ nodes.splice(idx, 1);
41
+ break;
42
+ }
43
+ ancestor = getParent(ancestor);
44
+ }
45
+ // If the node has been found to be unique, re-insert it.
46
+ if (replace) {
47
+ nodes[idx] = node;
48
+ }
49
+ }
50
+ return nodes;
51
+ }
52
+ function existsOne(test, elems) {
53
+ return elems.some(function (elem) {
54
+ return isTag(elem) ? test(elem) || existsOne(test, getChildren(elem)) : false;
55
+ });
56
+ }
57
+ function getSiblings(node) {
58
+ var parent = getParent(node);
59
+ return parent && getChildren(parent);
60
+ }
61
+ function hasAttrib(elem, name) {
62
+ return getAttributeValue(elem, name) !== undefined;
63
+ }
64
+ function findOne(test, elems) {
65
+ var elem = null;
66
+ for (var i = 0, l = elems.length; i < l && !elem; i++) {
67
+ var el = elems[i];
68
+ if (test(el)) {
69
+ elem = el;
70
+ }
71
+ else {
72
+ var childs = getChildren(el);
73
+ if (childs && childs.length > 0) {
74
+ elem = findOne(test, childs);
75
+ }
76
+ }
77
+ }
78
+ return elem;
79
+ }
80
+ function findAll(test, nodes) {
81
+ var result = [];
82
+ for (var i = 0, j = nodes.length; i < j; i++) {
83
+ if (!isTag(nodes[i]))
84
+ continue;
85
+ if (test(nodes[i]))
86
+ result.push(nodes[i]);
87
+ var childs = getChildren(nodes[i]);
88
+ if (childs)
89
+ result = result.concat(findAll(test, childs));
90
+ }
91
+ return result;
92
+ }
93
+ exports.default = {
94
+ isTag: isTag,
95
+ getAttributeValue: getAttributeValue,
96
+ getName: getName,
97
+ getChildren: getChildren,
98
+ getParent: getParent,
99
+ getText: getText,
100
+ removeSubsets: removeSubsets,
101
+ existsOne: existsOne,
102
+ getSiblings: getSiblings,
103
+ hasAttrib: hasAttrib,
104
+ findOne: findOne,
105
+ findAll: findAll
106
+ };
@@ -0,0 +1,18 @@
1
+ import Node from './node';
2
+ import NodeType from './type';
3
+ import HTMLElement from './html';
4
+ export default class CommentNode extends Node {
5
+ rawText: string;
6
+ constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
7
+ /**
8
+ * Node Type declaration.
9
+ * @type {Number}
10
+ */
11
+ nodeType: NodeType;
12
+ /**
13
+ * Get unescaped text value of current node and its children.
14
+ * @return {string} text content
15
+ */
16
+ get text(): string;
17
+ toString(): string;
18
+ }
@@ -0,0 +1,51 @@
1
+ "use strict";
2
+ var __extends = (this && this.__extends) || (function () {
3
+ var extendStatics = function (d, b) {
4
+ extendStatics = Object.setPrototypeOf ||
5
+ ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||
6
+ function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };
7
+ return extendStatics(d, b);
8
+ };
9
+ return function (d, b) {
10
+ if (typeof b !== "function" && b !== null)
11
+ throw new TypeError("Class extends value " + String(b) + " is not a constructor or null");
12
+ extendStatics(d, b);
13
+ function __() { this.constructor = d; }
14
+ d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
+ };
16
+ })();
17
+ var __importDefault = (this && this.__importDefault) || function (mod) {
18
+ return (mod && mod.__esModule) ? mod : { "default": mod };
19
+ };
20
+ Object.defineProperty(exports, "__esModule", { value: true });
21
+ var node_1 = __importDefault(require("./node"));
22
+ var type_1 = __importDefault(require("./type"));
23
+ var CommentNode = /** @class */ (function (_super) {
24
+ __extends(CommentNode, _super);
25
+ function CommentNode(rawText, parentNode, range) {
26
+ var _this = _super.call(this, parentNode, range) || this;
27
+ _this.rawText = rawText;
28
+ /**
29
+ * Node Type declaration.
30
+ * @type {Number}
31
+ */
32
+ _this.nodeType = type_1.default.COMMENT_NODE;
33
+ return _this;
34
+ }
35
+ Object.defineProperty(CommentNode.prototype, "text", {
36
+ /**
37
+ * Get unescaped text value of current node and its children.
38
+ * @return {string} text content
39
+ */
40
+ get: function () {
41
+ return this.rawText;
42
+ },
43
+ enumerable: false,
44
+ configurable: true
45
+ });
46
+ CommentNode.prototype.toString = function () {
47
+ return "<!--" + this.rawText + "-->";
48
+ };
49
+ return CommentNode;
50
+ }(node_1.default));
51
+ exports.default = CommentNode;
@@ -0,0 +1,206 @@
1
+ import Node from './node';
2
+ import NodeType from './type';
3
+ export interface KeyAttributes {
4
+ id?: string;
5
+ class?: string;
6
+ }
7
+ export interface Attributes {
8
+ [key: string]: string;
9
+ }
10
+ export interface RawAttributes {
11
+ [key: string]: string;
12
+ }
13
+ export declare type InsertPosition = 'beforebegin' | 'afterbegin' | 'beforeend' | 'afterend';
14
+ declare class DOMTokenList {
15
+ private _set;
16
+ private _afterUpdate;
17
+ private _validate;
18
+ constructor(valuesInit?: string[], afterUpdate?: (t: DOMTokenList) => void);
19
+ add(c: string): void;
20
+ replace(c1: string, c2: string): void;
21
+ remove(c: string): void;
22
+ toggle(c: string): void;
23
+ contains(c: string): boolean;
24
+ get length(): number;
25
+ values(): IterableIterator<string>;
26
+ get value(): string[];
27
+ toString(): string;
28
+ }
29
+ /**
30
+ * HTMLElement, which contains a set of children.
31
+ *
32
+ * Note: this is a minimalist implementation, no complete tree
33
+ * structure provided (no parentNode, nextSibling,
34
+ * previousSibling etc).
35
+ * @class HTMLElement
36
+ * @extends {Node}
37
+ */
38
+ export default class HTMLElement extends Node {
39
+ private rawAttrs;
40
+ private _attrs;
41
+ private _rawAttrs;
42
+ rawTagName: string;
43
+ id: string;
44
+ classList: DOMTokenList;
45
+ /**
46
+ * Node Type declaration.
47
+ */
48
+ nodeType: NodeType;
49
+ /**
50
+ * Quote attribute values
51
+ * @param attr attribute value
52
+ * @returns {string} quoted value
53
+ */
54
+ private quoteAttribute;
55
+ /**
56
+ * Creates an instance of HTMLElement.
57
+ * @param keyAttrs id and class attribute
58
+ * @param [rawAttrs] attributes in string
59
+ *
60
+ * @memberof HTMLElement
61
+ */
62
+ constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range?: [number, number]);
63
+ /**
64
+ * Remove current element
65
+ */
66
+ remove(): void;
67
+ /**
68
+ * Remove Child element from childNodes array
69
+ * @param {HTMLElement} node node to remove
70
+ */
71
+ removeChild(node: Node): void;
72
+ /**
73
+ * Exchanges given child with new child
74
+ * @param {HTMLElement} oldNode node to exchange
75
+ * @param {HTMLElement} newNode new node
76
+ */
77
+ exchangeChild(oldNode: Node, newNode: Node): void;
78
+ get tagName(): string;
79
+ get localName(): string;
80
+ /**
81
+ * Get escpaed (as-it) text value of current node and its children.
82
+ * @return {string} text content
83
+ */
84
+ get rawText(): string;
85
+ get textContent(): string;
86
+ set textContent(val: string);
87
+ /**
88
+ * Get unescaped text value of current node and its children.
89
+ * @return {string} text content
90
+ */
91
+ get text(): string;
92
+ /**
93
+ * Get structured Text (with '\n' etc.)
94
+ * @return {string} structured text
95
+ */
96
+ get structuredText(): string;
97
+ toString(): string;
98
+ get innerHTML(): string;
99
+ set innerHTML(content: string);
100
+ set_content(content: string | Node | Node[], options?: Options): void;
101
+ replaceWith(...nodes: (string | Node)[]): void;
102
+ get outerHTML(): string;
103
+ /**
104
+ * Trim element from right (in block) after seeing pattern in a TextNode.
105
+ * @param {RegExp} pattern pattern to find
106
+ * @return {HTMLElement} reference to current node
107
+ */
108
+ trimRight(pattern: RegExp): this;
109
+ /**
110
+ * Get DOM structure
111
+ * @return {string} strucutre
112
+ */
113
+ get structure(): string;
114
+ /**
115
+ * Remove whitespaces in this sub tree.
116
+ * @return {HTMLElement} pointer to this
117
+ */
118
+ removeWhitespace(): this;
119
+ /**
120
+ * Query CSS selector to find matching nodes.
121
+ * @param {string} selector Simplified CSS selector
122
+ * @return {HTMLElement[]} matching elements
123
+ */
124
+ querySelectorAll(selector: string): HTMLElement[];
125
+ /**
126
+ * Query CSS Selector to find matching node.
127
+ * @param {string} selector Simplified CSS selector
128
+ * @return {HTMLElement} matching node
129
+ */
130
+ querySelector(selector: string): HTMLElement;
131
+ /**
132
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
133
+ * @param selector a DOMString containing a selector list
134
+ */
135
+ closest(selector: string): Node;
136
+ /**
137
+ * Append a child node to childNodes
138
+ * @param {Node} node node to append
139
+ * @return {Node} node appended
140
+ */
141
+ appendChild<T extends Node = Node>(node: T): T;
142
+ /**
143
+ * Get first child node
144
+ * @return {Node} first child node
145
+ */
146
+ get firstChild(): Node;
147
+ /**
148
+ * Get last child node
149
+ * @return {Node} last child node
150
+ */
151
+ get lastChild(): Node;
152
+ /**
153
+ * Get attributes
154
+ * @access private
155
+ * @return {Object} parsed and unescaped attributes
156
+ */
157
+ get attrs(): Attributes;
158
+ get attributes(): Record<string, string>;
159
+ /**
160
+ * Get escaped (as-it) attributes
161
+ * @return {Object} parsed attributes
162
+ */
163
+ get rawAttributes(): RawAttributes;
164
+ removeAttribute(key: string): void;
165
+ hasAttribute(key: string): boolean;
166
+ /**
167
+ * Get an attribute
168
+ * @return {string} value of the attribute
169
+ */
170
+ getAttribute(key: string): string | undefined;
171
+ /**
172
+ * Set an attribute value to the HTMLElement
173
+ * @param {string} key The attribute name
174
+ * @param {string} value The value to set, or null / undefined to remove an attribute
175
+ */
176
+ setAttribute(key: string, value: string): void;
177
+ /**
178
+ * Replace all the attributes of the HTMLElement by the provided attributes
179
+ * @param {Attributes} attributes the new attribute set
180
+ */
181
+ setAttributes(attributes: Attributes): void;
182
+ insertAdjacentHTML(where: InsertPosition, html: string): void;
183
+ get nextSibling(): Node;
184
+ get nextElementSibling(): HTMLElement;
185
+ get classNames(): string;
186
+ }
187
+ export interface Options {
188
+ lowerCaseTagName: boolean;
189
+ comment: boolean;
190
+ blockTextElements: {
191
+ [tag: string]: boolean;
192
+ };
193
+ }
194
+ /**
195
+ * Parses HTML and returns a root element
196
+ * Parse a chuck of HTML source.
197
+ * @param {string} data html
198
+ * @return {HTMLElement} root element
199
+ */
200
+ export declare function base_parse(data: string, options?: Partial<Options>): HTMLElement[];
201
+ /**
202
+ * Parses HTML and returns a root element
203
+ * Parse a chuck of HTML source.
204
+ */
205
+ export declare function parse(data: string, options?: Partial<Options>): HTMLElement;
206
+ export {};