node-html-parser 5.4.1 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [6.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v6.0.0) (2022-09-08)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * Preserve invalid nested A tags in AST (see [#215](https://github.com/taoqf/node-fast-html-parser/issues/215) for detail) ([374188f](https://github.com/taoqf/node-fast-html-parser/commit/374188f1c6d6c6d0567348b8e8d20957f5a93fb8))
11
+
12
+ ### [5.4.2](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v5.4.2) (2022-08-30)
13
+
5
14
  ## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
6
15
 
7
16
  ### Features
package/dist/index.d.ts CHANGED
@@ -1,7 +1,20 @@
1
- export { default as CommentNode } from './nodes/comment';
2
- export { default as HTMLElement, Options } from './nodes/html';
3
- export { default as parse, default } from './parse';
4
- export { default as valid } from './valid';
5
- export { default as Node } from './nodes/node';
6
- export { default as TextNode } from './nodes/text';
7
- export { default as NodeType } from './nodes/type';
1
+ import CommentNode from './nodes/comment';
2
+ import HTMLElement, { Options } from './nodes/html';
3
+ import Node from './nodes/node';
4
+ import TextNode from './nodes/text';
5
+ import NodeType from './nodes/type';
6
+ import baseParse from './parse';
7
+ import valid from './valid';
8
+ export { Options } from './nodes/html';
9
+ export { parse, HTMLElement, CommentNode, valid, Node, TextNode, NodeType };
10
+ declare function parse(data: string, options?: Partial<Options>): HTMLElement;
11
+ declare namespace parse {
12
+ var parse: typeof baseParse;
13
+ var HTMLElement: typeof import("./nodes/html").default;
14
+ var CommentNode: typeof import("./nodes/comment").default;
15
+ var valid: typeof import("./valid").default;
16
+ var Node: typeof import("./nodes/node").default;
17
+ var TextNode: typeof import("./nodes/text").default;
18
+ var NodeType: typeof import("./nodes/type").default;
19
+ }
20
+ export default parse;
package/dist/index.js CHANGED
@@ -3,19 +3,33 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
3
3
  return (mod && mod.__esModule) ? mod : { "default": mod };
4
4
  };
5
5
  Object.defineProperty(exports, "__esModule", { value: true });
6
- exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.default = exports.parse = exports.HTMLElement = exports.CommentNode = void 0;
7
- var comment_1 = require("./nodes/comment");
8
- Object.defineProperty(exports, "CommentNode", { enumerable: true, get: function () { return __importDefault(comment_1).default; } });
9
- var html_1 = require("./nodes/html");
10
- Object.defineProperty(exports, "HTMLElement", { enumerable: true, get: function () { return __importDefault(html_1).default; } });
11
- var parse_1 = require("./parse");
12
- Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
13
- Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
14
- var valid_1 = require("./valid");
15
- Object.defineProperty(exports, "valid", { enumerable: true, get: function () { return __importDefault(valid_1).default; } });
16
- var node_1 = require("./nodes/node");
17
- Object.defineProperty(exports, "Node", { enumerable: true, get: function () { return __importDefault(node_1).default; } });
18
- var text_1 = require("./nodes/text");
19
- Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_1).default; } });
20
- var type_1 = require("./nodes/type");
21
- Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_1).default; } });
6
+ exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
7
+ var comment_1 = __importDefault(require("./nodes/comment"));
8
+ exports.CommentNode = comment_1.default;
9
+ var html_1 = __importDefault(require("./nodes/html"));
10
+ exports.HTMLElement = html_1.default;
11
+ var node_1 = __importDefault(require("./nodes/node"));
12
+ exports.Node = node_1.default;
13
+ var text_1 = __importDefault(require("./nodes/text"));
14
+ exports.TextNode = text_1.default;
15
+ var type_1 = __importDefault(require("./nodes/type"));
16
+ exports.NodeType = type_1.default;
17
+ var parse_1 = __importDefault(require("./parse"));
18
+ var valid_1 = __importDefault(require("./valid"));
19
+ exports.valid = valid_1.default;
20
+ function parse(data, options) {
21
+ if (options === void 0) { options = {
22
+ lowerCaseTagName: false,
23
+ comment: false
24
+ }; }
25
+ return (0, parse_1.default)(data, options);
26
+ }
27
+ exports.default = parse;
28
+ exports.parse = parse;
29
+ parse.parse = parse_1.default;
30
+ parse.HTMLElement = html_1.default;
31
+ parse.CommentNode = comment_1.default;
32
+ parse.valid = valid_1.default;
33
+ parse.Node = node_1.default;
34
+ parse.TextNode = text_1.default;
35
+ parse.NodeType = type_1.default;
package/dist/main.js CHANGED
@@ -1042,7 +1042,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1042
1042
  }
1043
1043
  var attrs = {};
1044
1044
  if (this.rawAttrs) {
1045
- var re = /([a-zA-Z()[\]#][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1045
+ var re = /([a-zA-Z()[\]#@][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
1046
1046
  var match = void 0;
1047
1047
  while ((match = re.exec(this.rawAttrs))) {
1048
1048
  var key = match[1];
@@ -1393,7 +1393,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1393
1393
  var match;
1394
1394
  // https://github.com/taoqf/node-html-parser/issues/38
1395
1395
  data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
1396
- var lowerCaseTagName = options.lowerCaseTagName;
1396
+ var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
1397
1397
  var dataEndPos = data.length - (frameflag.length + 2);
1398
1398
  var frameFlagOffset = frameflag.length + 2;
1399
1399
  while ((match = kMarkupPattern.exec(data))) {
@@ -1445,7 +1445,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1445
1445
  }
1446
1446
  }
1447
1447
  // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1448
- if (tagName === 'a' || tagName === 'A') {
1448
+ if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
1449
1449
  if (noNestedTagIndex !== undefined) {
1450
1450
  stack.splice(noNestedTagIndex);
1451
1451
  currentParent = (0, back_1.default)(stack);
@@ -1484,7 +1484,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1484
1484
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1485
1485
  if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1486
1486
  while (true) {
1487
- if (tagName === 'a' || tagName === 'A')
1487
+ if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
1488
1488
  noNestedTagIndex = undefined;
1489
1489
  if (currentParent.rawTagName === tagName) {
1490
1490
  // Update range end for closed tag
@@ -1628,16 +1628,37 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
1628
1628
  }
1629
1629
  exports.default = valid;
1630
1630
  });
1631
- define("index", ["require", "exports", "nodes/comment", "nodes/html", "parse", "valid", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, comment_2, html_3, parse_1, valid_1, node_4, text_2, type_5) {
1631
+ define("index", ["require", "exports", "nodes/comment", "nodes/html", "nodes/node", "nodes/text", "nodes/type", "parse", "valid"], function (require, exports, comment_2, html_3, node_4, text_2, type_5, parse_1, valid_1) {
1632
1632
  "use strict";
1633
1633
  Object.defineProperty(exports, "__esModule", { value: true });
1634
- exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.default = exports.parse = exports.HTMLElement = exports.CommentNode = void 0;
1635
- Object.defineProperty(exports, "CommentNode", { enumerable: true, get: function () { return __importDefault(comment_2).default; } });
1636
- Object.defineProperty(exports, "HTMLElement", { enumerable: true, get: function () { return __importDefault(html_3).default; } });
1637
- Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
1638
- Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
1639
- Object.defineProperty(exports, "valid", { enumerable: true, get: function () { return __importDefault(valid_1).default; } });
1640
- Object.defineProperty(exports, "Node", { enumerable: true, get: function () { return __importDefault(node_4).default; } });
1641
- Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_2).default; } });
1642
- Object.defineProperty(exports, "NodeType", { enumerable: true, get: function () { return __importDefault(type_5).default; } });
1634
+ exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.CommentNode = exports.HTMLElement = exports.parse = void 0;
1635
+ comment_2 = __importDefault(comment_2);
1636
+ html_3 = __importDefault(html_3);
1637
+ node_4 = __importDefault(node_4);
1638
+ text_2 = __importDefault(text_2);
1639
+ type_5 = __importDefault(type_5);
1640
+ parse_1 = __importDefault(parse_1);
1641
+ valid_1 = __importDefault(valid_1);
1642
+ exports.CommentNode = comment_2.default;
1643
+ exports.HTMLElement = html_3.default;
1644
+ exports.Node = node_4.default;
1645
+ exports.TextNode = text_2.default;
1646
+ exports.NodeType = type_5.default;
1647
+ exports.valid = valid_1.default;
1648
+ function parse(data, options) {
1649
+ if (options === void 0) { options = {
1650
+ lowerCaseTagName: false,
1651
+ comment: false
1652
+ }; }
1653
+ return (0, parse_1.default)(data, options);
1654
+ }
1655
+ exports.default = parse;
1656
+ exports.parse = parse;
1657
+ parse.parse = parse_1.default;
1658
+ parse.HTMLElement = html_3.default;
1659
+ parse.CommentNode = comment_2.default;
1660
+ parse.valid = valid_1.default;
1661
+ parse.Node = node_4.default;
1662
+ parse.TextNode = text_2.default;
1663
+ parse.NodeType = type_5.default;
1643
1664
  });
@@ -203,6 +203,10 @@ export default class HTMLElement extends Node {
203
203
  export interface Options {
204
204
  lowerCaseTagName: boolean;
205
205
  comment: boolean;
206
+ /**
207
+ * @see PR #215 for explanation
208
+ */
209
+ fixNestedATags?: boolean;
206
210
  parseNoneClosedTags?: boolean;
207
211
  blockTextElements: {
208
212
  [tag: string]: boolean;
@@ -709,7 +709,7 @@ var HTMLElement = /** @class */ (function (_super) {
709
709
  }
710
710
  var attrs = {};
711
711
  if (this.rawAttrs) {
712
- var re = /([a-zA-Z()[\]#][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
712
+ var re = /([a-zA-Z()[\]#@][a-zA-Z0-9-_:()[\]#]*)(?:\s*=\s*((?:'[^']*')|(?:"[^"]*")|\S+))?/g;
713
713
  var match = void 0;
714
714
  while ((match = re.exec(this.rawAttrs))) {
715
715
  var key = match[1];
@@ -1060,7 +1060,7 @@ function base_parse(data, options) {
1060
1060
  var match;
1061
1061
  // https://github.com/taoqf/node-html-parser/issues/38
1062
1062
  data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
1063
- var lowerCaseTagName = options.lowerCaseTagName;
1063
+ var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
1064
1064
  var dataEndPos = data.length - (frameflag.length + 2);
1065
1065
  var frameFlagOffset = frameflag.length + 2;
1066
1066
  while ((match = kMarkupPattern.exec(data))) {
@@ -1112,7 +1112,7 @@ function base_parse(data, options) {
1112
1112
  }
1113
1113
  }
1114
1114
  // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1115
- if (tagName === 'a' || tagName === 'A') {
1115
+ if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
1116
1116
  if (noNestedTagIndex !== undefined) {
1117
1117
  stack.splice(noNestedTagIndex);
1118
1118
  currentParent = (0, back_1.default)(stack);
@@ -1151,7 +1151,7 @@ function base_parse(data, options) {
1151
1151
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1152
1152
  if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1153
1153
  while (true) {
1154
- if (tagName === 'a' || tagName === 'A')
1154
+ if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
1155
1155
  noNestedTagIndex = undefined;
1156
1156
  if (currentParent.rawTagName === tagName) {
1157
1157
  // Update range end for closed tag
package/package.json CHANGED
@@ -1,14 +1,9 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "5.4.1",
3
+ "version": "6.0.0",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
- "exports": {
8
- "require": "./dist/index.js",
9
- "import": "./esm/index.js",
10
- "types": "./dist/index.d.ts"
11
- },
12
7
  "scripts": {
13
8
  "compile": "tsc",
14
9
  "build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
@@ -38,7 +33,6 @@
38
33
  ],
39
34
  "files": [
40
35
  "dist",
41
- "esm",
42
36
  "README.md",
43
37
  "LICENSE",
44
38
  "CHANGELOG.md"
package/esm/index.js DELETED
@@ -1,11 +0,0 @@
1
- import nhp from '../dist/index.js'
2
-
3
- export const CommentNode = nhp.CommentNode;
4
- export const HTMLElement = nhp.HTMLElement;
5
- export const parse = nhp.parse;
6
- export const valid = nhp.valid;
7
- export const Node = nhp.Node;
8
- export const TextNode = nhp.TextNode;
9
- export const NodeType = nhp.NodeType;
10
-
11
- export default nhp;
package/esm/package.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "type": "module"
3
- }