node-html-parser 5.4.2 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4
4
 
5
+ ## [6.0.0](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v6.0.0) (2022-09-08)
6
+
7
+
8
+ ### Bug Fixes
9
+
10
+ * Preserve invalid nested A tags in AST (see [#215](https://github.com/taoqf/node-fast-html-parser/issues/215) for detail) ([374188f](https://github.com/taoqf/node-fast-html-parser/commit/374188f1c6d6c6d0567348b8e8d20957f5a93fb8))
11
+
5
12
  ### [5.4.2](https://github.com/taoqf/node-fast-html-parser/compare/v5.4.2-0...v5.4.2) (2022-08-30)
6
13
 
7
14
  ## [5.1.0](https://github.com/taoqf/node-fast-html-parser/compare/v4.1.5...v5.1.0) (2021-10-28)
package/dist/main.js CHANGED
@@ -1393,7 +1393,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1393
1393
  var match;
1394
1394
  // https://github.com/taoqf/node-html-parser/issues/38
1395
1395
  data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
1396
- var lowerCaseTagName = options.lowerCaseTagName;
1396
+ var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
1397
1397
  var dataEndPos = data.length - (frameflag.length + 2);
1398
1398
  var frameFlagOffset = frameflag.length + 2;
1399
1399
  while ((match = kMarkupPattern.exec(data))) {
@@ -1445,7 +1445,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1445
1445
  }
1446
1446
  }
1447
1447
  // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1448
- if (tagName === 'a' || tagName === 'A') {
1448
+ if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
1449
1449
  if (noNestedTagIndex !== undefined) {
1450
1450
  stack.splice(noNestedTagIndex);
1451
1451
  currentParent = (0, back_1.default)(stack);
@@ -1484,7 +1484,7 @@ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher
1484
1484
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1485
1485
  if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1486
1486
  while (true) {
1487
- if (tagName === 'a' || tagName === 'A')
1487
+ if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
1488
1488
  noNestedTagIndex = undefined;
1489
1489
  if (currentParent.rawTagName === tagName) {
1490
1490
  // Update range end for closed tag
@@ -203,6 +203,10 @@ export default class HTMLElement extends Node {
203
203
  export interface Options {
204
204
  lowerCaseTagName: boolean;
205
205
  comment: boolean;
206
+ /**
207
+ * @see PR #215 for explanation
208
+ */
209
+ fixNestedATags?: boolean;
206
210
  parseNoneClosedTags?: boolean;
207
211
  blockTextElements: {
208
212
  [tag: string]: boolean;
@@ -1060,7 +1060,7 @@ function base_parse(data, options) {
1060
1060
  var match;
1061
1061
  // https://github.com/taoqf/node-html-parser/issues/38
1062
1062
  data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
1063
- var lowerCaseTagName = options.lowerCaseTagName;
1063
+ var lowerCaseTagName = options.lowerCaseTagName, fixNestedATags = options.fixNestedATags;
1064
1064
  var dataEndPos = data.length - (frameflag.length + 2);
1065
1065
  var frameFlagOffset = frameflag.length + 2;
1066
1066
  while ((match = kMarkupPattern.exec(data))) {
@@ -1112,7 +1112,7 @@ function base_parse(data, options) {
1112
1112
  }
1113
1113
  }
1114
1114
  // Prevent nested A tags by terminating the last A and starting a new one : see issue #144
1115
- if (tagName === 'a' || tagName === 'A') {
1115
+ if (fixNestedATags && (tagName === 'a' || tagName === 'A')) {
1116
1116
  if (noNestedTagIndex !== undefined) {
1117
1117
  stack.splice(noNestedTagIndex);
1118
1118
  currentParent = (0, back_1.default)(stack);
@@ -1151,7 +1151,7 @@ function base_parse(data, options) {
1151
1151
  // Handle closing tags or self-closed elements (ie </tag> or <br>)
1152
1152
  if (leadingSlash || closingSlash || kSelfClosingElements[tagName]) {
1153
1153
  while (true) {
1154
- if (tagName === 'a' || tagName === 'A')
1154
+ if (noNestedTagIndex != null && (tagName === 'a' || tagName === 'A'))
1155
1155
  noNestedTagIndex = undefined;
1156
1156
  if (currentParent.rawTagName === tagName) {
1157
1157
  // Update range end for closed tag
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "5.4.2",
3
+ "version": "6.0.0",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",