node-html-parser 5.4.1 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/index.d.ts +20 -7
- package/dist/index.js +30 -16
- package/dist/main.js +35 -14
- package/dist/nodes/html.d.ts +4 -0
- package/dist/nodes/html.js +4 -4
- package/package.json +1 -7
- package/esm/index.js +0 -11
- package/esm/package.json +0 -3
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
|
|
4
4
|
|
|
5
|
+
## [6.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v6.0.0) (2022-09-08)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
### Bug Fixes
|
|
9
|
+
|
|
10
|
+
* Preserve invalid nested A tags in AST (see [#215](https://github.com/taoqf/node-fast-html-parser/issues/215) for detail) ([374188f](https://github.com/taoqf/node-fast-html-parser/commit/374188f1c6d6c6d0567348b8e8d20957f5a93fb8))
|
|
11
|
+
|
|
12
|
+
### [5.4.2](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v5.4.2) (2022-08-30)
|
|
13
|
+
|
|
5
14
|
## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
|
|
6
15
|
|
|
7
16
|
### Features
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,20 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import CommentNode from './nodes/comment';
|
|
2
|
+
import HTMLElement, { Options } from './nodes/html';
|
|
3
|
+
import Node from './nodes/node';
|
|
4
|
+
import TextNode from './nodes/text';
|
|
5
|
+
import NodeType from './nodes/type';
|
|
6
|
+
import baseParse from './parse';
|
|
7
|
+
import valid from './valid';
|
|
8
|
+
export { Options } from './nodes/html';
|
|
9
|
+
export { parse, HTMLElement, CommentNode, valid, Node, TextNode, NodeType };
|
|
10
|
+
declare function parse(data: string, options?: Partial<Options>): HTMLElement;
|
|
11
|
+
declare namespace parse {
|
|
12
|
+
var parse: typeof baseParse;
|
|
13
|
+
var HTMLElement: typeof import("./nodes/html").default;
|
|
14
|
+
var CommentNode: typeof import("./nodes/comment").default;
|
|
15
|
+
var valid: typeof import("./valid").default;
|
|
16
|
+
var Node: typeof import("./nodes/node").default;
|
|
17
|
+
var TextNode: typeof import("./nodes/text").default;
|
|
18
|
+
var NodeType: typeof import("./nodes/type").default;
|
|
19
|
+
}
|
|
20
|
+
export default parse;
|
package/dist/index.js
CHANGED
|
@@ -3,19 +3,33 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
3
3
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
4
|
};
|
|
5
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
-
exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.
|
|
7
|
-
var comment_1 = require("./nodes/comment");
|
|
8
|
-
|
|
9
|
-
var html_1 = require("./nodes/html");
|
|
10
|
-
|
|
11
|
-
var
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
var
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
6
|
+
exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
|
|
7
|
+
var comment_1 = __importDefault(require("./nodes/comment"));
|
|
8
|
+
exports.CommentNode = comment_1.default;
|
|
9
|
+
var html_1 = __importDefault(require("./nodes/html"));
|
|
10
|
+
exports.HTMLElement = html_1.default;
|
|
11
|
+
var node_1 = __importDefault(require("./nodes/node"));
|
|
12
|
+
exports.Node = node_1.default;
|
|
13
|
+
var text_1 = __importDefault(require("./nodes/text"));
|
|
14
|
+
exports.TextNode = text_1.default;
|
|
15
|
+
var type_1 = __importDefault(require("./nodes/type"));
|
|
16
|
+
exports.NodeType = type_1.default;
|
|
17
|
+
var parse_1 = __importDefault(require("./parse"));
|
|
18
|
+
var valid_1 = __importDefault(require("./valid"));
|
|
19
|
+
exports.valid = valid_1.default;
|
|
20
|
+
function parse(data, options) {
|
|
21
|
+
if (options === void 0) { options = {
|
|
22
|
+
lowerCaseTagName: false,
|
|
23
|
+
comment: false
|
|
24
|
+
}; }
|
|
25
|
+
return (0, parse_1.default)(data, options);
|
|
26
|
+
}
|
|
27
|
+
exports.default = parse;
|
|
28
|
+
exports.parse = parse;
|
|
29
|
+
parse.parse = parse_1.default;
|
|
30
|
+
parse.HTMLElement = html_1.default;
|
|
31
|
+
parse.CommentNode = comment_1.default;
|
|
32
|
+
parse.valid = valid_1.default;
|
|
33
|
+
parse.Node = node_1.default;
|
|
34
|
+
parse.TextNode = text_1.default;
|
|
35
|
+
parse.NodeType = type_1.default;
|
package/dist/main.js
CHANGED
|
@@ -1042,7 +1042,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1042
1042
|
}
|
|
1043
1043
|
var attrs = {};
|
|
1044
1044
|
if (this.rawAttrs) {
|
|
1045
|
-
var re = /([a-zA-Z()[\]
|
|
1045
|
+
var re = /([a-zA-Z()[\]#@][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
1046
1046
|
var match = void 0;
|
|
1047
1047
|
while ((match = re.exec(this.rawAttrs))) {
|
|
1048
1048
|
var key = match[1];
|
|
@@ -1393,7 +1393,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1393
1393
|
var match;
|
|
1394
1394
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1395
1395
|
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
1396
|
-
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1396
|
+
var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
|
|
1397
1397
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1398
1398
|
var frameFlagOffset = frameflag.length + 2;
|
|
1399
1399
|
while ((match = kMarkupPattern.exec(data))) {
|
|
@@ -1445,7 +1445,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1445
1445
|
}
|
|
1446
1446
|
}
|
|
1447
1447
|
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1448
|
-
if (tagName === 'a' || tagName === 'A') {
|
|
1448
|
+
if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
|
|
1449
1449
|
if (noNestedTagIndex !== undefined) {
|
|
1450
1450
|
stack.splice(noNestedTagIndex);
|
|
1451
1451
|
currentParent = (0, back_1.default)(stack);
|
|
@@ -1484,7 +1484,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
|
|
|
1484
1484
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1485
1485
|
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1486
1486
|
while (true) {
|
|
1487
|
-
if (tagName === 'a' || tagName === 'A')
|
|
1487
|
+
if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
|
|
1488
1488
|
noNestedTagIndex = undefined;
|
|
1489
1489
|
if (currentParent.rawTagName === tagName) {
|
|
1490
1490
|
// Update range end for closed tag
|
|
@@ -1628,16 +1628,37 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
|
|
|
1628
1628
|
}
|
|
1629
1629
|
exports.default = valid;
|
|
1630
1630
|
});
|
|
1631
|
-
define("index", ["require", "exports", "nodes/comment", "nodes/html", "
|
|
1631
|
+
define("index", ["require", "exports", "nodes/comment", "nodes/html", "nodes/node", "nodes/text", "nodes/type", "parse", "valid"], function (require, exports, comment_2, html_3, node_4, text_2, type_5, parse_1, valid_1) {
|
|
1632
1632
|
"use strict";
|
|
1633
1633
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
1634
|
-
exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1638
|
-
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1634
|
+
exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
|
|
1635
|
+
comment_2 = __importDefault(comment_2);
|
|
1636
|
+
html_3 = __importDefault(html_3);
|
|
1637
|
+
node_4 = __importDefault(node_4);
|
|
1638
|
+
text_2 = __importDefault(text_2);
|
|
1639
|
+
type_5 = __importDefault(type_5);
|
|
1640
|
+
parse_1 = __importDefault(parse_1);
|
|
1641
|
+
valid_1 = __importDefault(valid_1);
|
|
1642
|
+
exports.CommentNode = comment_2.default;
|
|
1643
|
+
exports.HTMLElement = html_3.default;
|
|
1644
|
+
exports.Node = node_4.default;
|
|
1645
|
+
exports.TextNode = text_2.default;
|
|
1646
|
+
exports.NodeType = type_5.default;
|
|
1647
|
+
exports.valid = valid_1.default;
|
|
1648
|
+
function parse(data, options) {
|
|
1649
|
+
if (options === void 0) { options = {
|
|
1650
|
+
lowerCaseTagName: false,
|
|
1651
|
+
comment: false
|
|
1652
|
+
}; }
|
|
1653
|
+
return (0, parse_1.default)(data, options);
|
|
1654
|
+
}
|
|
1655
|
+
exports.default = parse;
|
|
1656
|
+
exports.parse = parse;
|
|
1657
|
+
parse.parse = parse_1.default;
|
|
1658
|
+
parse.HTMLElement = html_3.default;
|
|
1659
|
+
parse.CommentNode = comment_2.default;
|
|
1660
|
+
parse.valid = valid_1.default;
|
|
1661
|
+
parse.Node = node_4.default;
|
|
1662
|
+
parse.TextNode = text_2.default;
|
|
1663
|
+
parse.NodeType = type_5.default;
|
|
1643
1664
|
});
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -203,6 +203,10 @@ export default class HTMLElement extends Node {
|
|
|
203
203
|
export interface Options {
|
|
204
204
|
lowerCaseTagName: boolean;
|
|
205
205
|
comment: boolean;
|
|
206
|
+
/**
|
|
207
|
+
* @see PR #215 for explanation
|
|
208
|
+
*/
|
|
209
|
+
fixNestedATags?: boolean;
|
|
206
210
|
parseNoneClosedTags?: boolean;
|
|
207
211
|
blockTextElements: {
|
|
208
212
|
[tag: string]: boolean;
|
package/dist/nodes/html.js
CHANGED
|
@@ -709,7 +709,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
709
709
|
}
|
|
710
710
|
var attrs = {};
|
|
711
711
|
if (this.rawAttrs) {
|
|
712
|
-
var re = /([a-zA-Z()[\]
|
|
712
|
+
var re = /([a-zA-Z()[\]#@][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
|
|
713
713
|
var match = void 0;
|
|
714
714
|
while ((match = re.exec(this.rawAttrs))) {
|
|
715
715
|
var key = match[1];
|
|
@@ -1060,7 +1060,7 @@ function base_parse(data, options) {
|
|
|
1060
1060
|
var match;
|
|
1061
1061
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1062
1062
|
data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
|
|
1063
|
-
var lowerCaseTagName = options.lowerCaseTagName;
|
|
1063
|
+
var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
|
|
1064
1064
|
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1065
1065
|
var frameFlagOffset = frameflag.length + 2;
|
|
1066
1066
|
while ((match = kMarkupPattern.exec(data))) {
|
|
@@ -1112,7 +1112,7 @@ function base_parse(data, options) {
|
|
|
1112
1112
|
}
|
|
1113
1113
|
}
|
|
1114
1114
|
// Prevent nested A tags by terminating the last A and starting a new one : see issue #144
|
|
1115
|
-
if (tagName === 'a' || tagName === 'A') {
|
|
1115
|
+
if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
|
|
1116
1116
|
if (noNestedTagIndex !== undefined) {
|
|
1117
1117
|
stack.splice(noNestedTagIndex);
|
|
1118
1118
|
currentParent = (0, back_1.default)(stack);
|
|
@@ -1151,7 +1151,7 @@ function base_parse(data, options) {
|
|
|
1151
1151
|
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1152
1152
|
if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
|
|
1153
1153
|
while (true) {
|
|
1154
|
-
if (tagName === 'a' || tagName === 'A')
|
|
1154
|
+
if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
|
|
1155
1155
|
noNestedTagIndex = undefined;
|
|
1156
1156
|
if (currentParent.rawTagName === tagName) {
|
|
1157
1157
|
// Update range end for closed tag
|
package/package.json
CHANGED
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "6.0.0",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
|
-
"exports": {
|
|
8
|
-
"require": "./dist/index.js",
|
|
9
|
-
"import": "./esm/index.js",
|
|
10
|
-
"types": "./dist/index.d.ts"
|
|
11
|
-
},
|
|
12
7
|
"scripts": {
|
|
13
8
|
"compile": "tsc",
|
|
14
9
|
"build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
|
|
@@ -38,7 +33,6 @@
|
|
|
38
33
|
],
|
|
39
34
|
"files": [
|
|
40
35
|
"dist",
|
|
41
|
-
"esm",
|
|
42
36
|
"README.md",
|
|
43
37
|
"LICENSE",
|
|
44
38
|
"CHANGELOG.md"
|
package/esm/index.js
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import nhp from '../dist/index.js'
|
|
2
|
-
|
|
3
|
-
export const CommentNode = nhp.CommentNode;
|
|
4
|
-
export const HTMLElement = nhp.HTMLElement;
|
|
5
|
-
export const parse = nhp.parse;
|
|
6
|
-
export const valid = nhp.valid;
|
|
7
|
-
export const Node = nhp.Node;
|
|
8
|
-
export const TextNode = nhp.TextNode;
|
|
9
|
-
export const NodeType = nhp.NodeType;
|
|
10
|
-
|
|
11
|
-
export default nhp;
|
package/esm/package.json
DELETED