node-html-parser 4.0.0 → 4.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/esm/nodes/comment.js +2 -2
- package/dist/esm/nodes/html.js +42 -38
- package/dist/esm/nodes/node.js +7 -1
- package/dist/esm/nodes/text.js +2 -2
- package/dist/main.js +86 -76
- package/dist/nodes/comment.d.ts +2 -2
- package/dist/nodes/comment.js +2 -2
- package/dist/nodes/html.d.ts +1 -1
- package/dist/nodes/html.js +71 -67
- package/dist/nodes/node.d.ts +2 -1
- package/dist/nodes/node.js +9 -3
- package/dist/nodes/text.d.ts +1 -1
- package/dist/nodes/text.js +3 -3
- package/dist/valid.js +1 -1
- package/package.json +3 -2
package/README.md
CHANGED
|
@@ -249,3 +249,7 @@ Get all attributes of current element. **Notice: do not try to change the return
|
|
|
249
249
|
### HTMLElement#classList
|
|
250
250
|
|
|
251
251
|
Get all attributes of current element. **Notice: do not try to change the returned value.**
|
|
252
|
+
|
|
253
|
+
### HTMLElement#range
|
|
254
|
+
|
|
255
|
+
Corresponding source code start and end indexes (ie [ 0, 40 ])
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import Node from './node';
|
|
2
2
|
import NodeType from './type';
|
|
3
3
|
export default class CommentNode extends Node {
|
|
4
|
-
constructor(rawText, parentNode) {
|
|
5
|
-
super(parentNode);
|
|
4
|
+
constructor(rawText, parentNode, range) {
|
|
5
|
+
super(parentNode, range);
|
|
6
6
|
this.rawText = rawText;
|
|
7
7
|
/**
|
|
8
8
|
* Node Type declaration.
|
package/dist/esm/nodes/html.js
CHANGED
|
@@ -151,8 +151,8 @@ export default class HTMLElement extends Node {
|
|
|
151
151
|
*
|
|
152
152
|
* @memberof HTMLElement
|
|
153
153
|
*/
|
|
154
|
-
constructor(tagName, keyAttrs, rawAttrs = '', parentNode) {
|
|
155
|
-
super(parentNode);
|
|
154
|
+
constructor(tagName, keyAttrs, rawAttrs = '', parentNode, range) {
|
|
155
|
+
super(parentNode, range);
|
|
156
156
|
this.rawAttrs = rawAttrs;
|
|
157
157
|
/**
|
|
158
158
|
* Node Type declaration.
|
|
@@ -946,39 +946,47 @@ export function base_parse(data, options = { lowerCaseTagName: false, comment: f
|
|
|
946
946
|
return it.test(tag);
|
|
947
947
|
});
|
|
948
948
|
}
|
|
949
|
-
const
|
|
949
|
+
const createRange = (startPos, endPos) => [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
950
|
+
const root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
950
951
|
let currentParent = root;
|
|
951
952
|
const stack = [root];
|
|
952
953
|
let lastTextPos = -1;
|
|
953
954
|
let match;
|
|
954
955
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
955
956
|
data = `<${frameflag}>${data}</${frameflag}>`;
|
|
957
|
+
const dataEndPos = data.length - (frameflag.length + 2);
|
|
958
|
+
const frameFlagOffset = frameflag.length + 2;
|
|
956
959
|
while ((match = kMarkupPattern.exec(data))) {
|
|
960
|
+
const tagStartPos = kMarkupPattern.lastIndex - match[0].length;
|
|
961
|
+
const tagEndPos = kMarkupPattern.lastIndex;
|
|
962
|
+
// Add TextNode if content
|
|
957
963
|
if (lastTextPos > -1) {
|
|
958
|
-
if (lastTextPos + match[0].length <
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
currentParent.appendChild(new TextNode(text, currentParent));
|
|
964
|
+
if (lastTextPos + match[0].length < tagEndPos) {
|
|
965
|
+
const text = data.substring(lastTextPos, tagStartPos);
|
|
966
|
+
currentParent.appendChild(new TextNode(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
962
967
|
}
|
|
963
968
|
}
|
|
964
969
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
965
|
-
|
|
970
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
971
|
+
// Skip frameflag node
|
|
972
|
+
if (match[2] === frameflag)
|
|
966
973
|
continue;
|
|
967
|
-
|
|
974
|
+
// Handle comments
|
|
968
975
|
if (match[0][1] === '!') {
|
|
969
|
-
// this is a comment
|
|
970
976
|
if (options.comment) {
|
|
971
977
|
// Only keep what is in between <!-- and -->
|
|
972
|
-
const text = data.substring(
|
|
973
|
-
currentParent.appendChild(new CommentNode(text, currentParent));
|
|
978
|
+
const text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
979
|
+
currentParent.appendChild(new CommentNode(text, currentParent, createRange(tagStartPos, tagEndPos)));
|
|
974
980
|
}
|
|
975
981
|
continue;
|
|
976
982
|
}
|
|
977
|
-
|
|
983
|
+
/* -- Handle tag matching -- */
|
|
984
|
+
// Fix tag casing if necessary
|
|
985
|
+
if (options.lowerCaseTagName)
|
|
978
986
|
match[2] = match[2].toLowerCase();
|
|
979
|
-
|
|
987
|
+
// Handle opening tags (ie. <this> not </that>)
|
|
980
988
|
if (!match[1]) {
|
|
981
|
-
|
|
989
|
+
/* Populate attributes */
|
|
982
990
|
const attrs = {};
|
|
983
991
|
for (let attMatch; (attMatch = kAttributePattern.exec(match[3]));) {
|
|
984
992
|
attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
|
|
@@ -990,45 +998,41 @@ export function base_parse(data, options = { lowerCaseTagName: false, comment: f
|
|
|
990
998
|
currentParent = arr_back(stack);
|
|
991
999
|
}
|
|
992
1000
|
}
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
currentParent = currentParent.appendChild(
|
|
1001
|
+
const tagEndPos = kMarkupPattern.lastIndex;
|
|
1002
|
+
const tagStartPos = tagEndPos - match[0].length;
|
|
1003
|
+
currentParent = currentParent.appendChild(
|
|
1004
|
+
// Initialize range (end position updated later for closed tags)
|
|
1005
|
+
new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos, tagEndPos)));
|
|
996
1006
|
stack.push(currentParent);
|
|
997
1007
|
if (is_block_text_element(match[2])) {
|
|
998
|
-
//
|
|
1008
|
+
// Find closing tag
|
|
999
1009
|
const closeMarkup = `</${match[2]}>`;
|
|
1000
|
-
const
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
return data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1005
|
-
})();
|
|
1010
|
+
const closeIndex = options.lowerCaseTagName
|
|
1011
|
+
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1012
|
+
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1013
|
+
const textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1006
1014
|
if (element_should_be_ignore(match[2])) {
|
|
1007
|
-
|
|
1008
|
-
if (
|
|
1009
|
-
|
|
1010
|
-
text = data.substr(kMarkupPattern.lastIndex);
|
|
1011
|
-
}
|
|
1012
|
-
else {
|
|
1013
|
-
text = data.substring(kMarkupPattern.lastIndex, index);
|
|
1014
|
-
}
|
|
1015
|
-
if (text.length > 0) {
|
|
1016
|
-
currentParent.appendChild(new TextNode(text, currentParent));
|
|
1015
|
+
const text = data.substring(tagEndPos, textEndPos);
|
|
1016
|
+
if (text.length > 0 && /\S/.test(text)) {
|
|
1017
|
+
currentParent.appendChild(new TextNode(text, currentParent, createRange(tagEndPos, textEndPos)));
|
|
1017
1018
|
}
|
|
1018
1019
|
}
|
|
1019
|
-
if (
|
|
1020
|
+
if (closeIndex === -1) {
|
|
1020
1021
|
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
|
|
1021
1022
|
}
|
|
1022
1023
|
else {
|
|
1023
|
-
lastTextPos = kMarkupPattern.lastIndex =
|
|
1024
|
+
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1025
|
+
// Cause to be treated as self-closing, because no close found
|
|
1024
1026
|
match[1] = 'true';
|
|
1025
1027
|
}
|
|
1026
1028
|
}
|
|
1027
1029
|
}
|
|
1030
|
+
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1028
1031
|
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
|
|
1029
|
-
// </ or /> or <br> etc.
|
|
1030
1032
|
while (true) {
|
|
1031
1033
|
if (currentParent.rawTagName === match[2]) {
|
|
1034
|
+
// Update range end for closed tag
|
|
1035
|
+
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1032
1036
|
stack.pop();
|
|
1033
1037
|
currentParent = arr_back(stack);
|
|
1034
1038
|
break;
|
package/dist/esm/nodes/node.js
CHANGED
|
@@ -3,9 +3,15 @@ import { decode, encode } from 'he';
|
|
|
3
3
|
* Node Class as base class for TextNode and HTMLElement.
|
|
4
4
|
*/
|
|
5
5
|
export default class Node {
|
|
6
|
-
constructor(parentNode = null) {
|
|
6
|
+
constructor(parentNode = null, range) {
|
|
7
7
|
this.parentNode = parentNode;
|
|
8
8
|
this.childNodes = [];
|
|
9
|
+
Object.defineProperty(this, 'range', {
|
|
10
|
+
enumerable: false,
|
|
11
|
+
writable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
14
|
+
});
|
|
9
15
|
}
|
|
10
16
|
get innerText() {
|
|
11
17
|
return this.rawText;
|
package/dist/esm/nodes/text.js
CHANGED
|
@@ -6,8 +6,8 @@ import NodeType from './type';
|
|
|
6
6
|
* @param {string} value [description]
|
|
7
7
|
*/
|
|
8
8
|
export default class TextNode extends Node {
|
|
9
|
-
constructor(rawText, parentNode) {
|
|
10
|
-
super(parentNode);
|
|
9
|
+
constructor(rawText, parentNode, range) {
|
|
10
|
+
super(parentNode, range);
|
|
11
11
|
/**
|
|
12
12
|
* Node Type declaration.
|
|
13
13
|
* @type {Number}
|
package/dist/main.js
CHANGED
|
@@ -27,10 +27,14 @@ var __assign = (this && this.__assign) || function () {
|
|
|
27
27
|
};
|
|
28
28
|
return __assign.apply(this, arguments);
|
|
29
29
|
};
|
|
30
|
-
var __spreadArray = (this && this.__spreadArray) || function (to, from) {
|
|
31
|
-
for (var i = 0,
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
31
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
32
|
+
if (ar || !(i in from)) {
|
|
33
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
34
|
+
ar[i] = from[i];
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
return to.concat(ar || from);
|
|
34
38
|
};
|
|
35
39
|
define("back", ["require", "exports"], function (require, exports) {
|
|
36
40
|
"use strict";
|
|
@@ -62,8 +66,8 @@ define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], f
|
|
|
62
66
|
*/
|
|
63
67
|
var TextNode = /** @class */ (function (_super) {
|
|
64
68
|
__extends(TextNode, _super);
|
|
65
|
-
function TextNode(rawText, parentNode) {
|
|
66
|
-
var _this = _super.call(this, parentNode) || this;
|
|
69
|
+
function TextNode(rawText, parentNode, range) {
|
|
70
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
67
71
|
/**
|
|
68
72
|
* Node Type declaration.
|
|
69
73
|
* @type {Number}
|
|
@@ -119,7 +123,7 @@ define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], f
|
|
|
119
123
|
* @return {string} text content
|
|
120
124
|
*/
|
|
121
125
|
get: function () {
|
|
122
|
-
return he_1.decode(this.rawText);
|
|
126
|
+
return (0, he_1.decode)(this.rawText);
|
|
123
127
|
},
|
|
124
128
|
enumerable: false,
|
|
125
129
|
configurable: true
|
|
@@ -446,9 +450,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
446
450
|
*
|
|
447
451
|
* @memberof HTMLElement
|
|
448
452
|
*/
|
|
449
|
-
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
|
|
453
|
+
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range) {
|
|
450
454
|
if (rawAttrs === void 0) { rawAttrs = ''; }
|
|
451
|
-
var _this = _super.call(this, parentNode) || this;
|
|
455
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
452
456
|
_this.rawAttrs = rawAttrs;
|
|
453
457
|
/**
|
|
454
458
|
* Node Type declaration.
|
|
@@ -680,7 +684,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
680
684
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
681
685
|
return child === _this;
|
|
682
686
|
});
|
|
683
|
-
this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx)), content), this.parentNode.childNodes.slice(idx + 1));
|
|
687
|
+
this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx), true), content, true), this.parentNode.childNodes.slice(idx + 1), true);
|
|
684
688
|
};
|
|
685
689
|
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
|
|
686
690
|
get: function () {
|
|
@@ -773,7 +777,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
773
777
|
* @return {HTMLElement[]} matching elements
|
|
774
778
|
*/
|
|
775
779
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
776
|
-
return css_select_1.selectAll(selector, this, {
|
|
780
|
+
return (0, css_select_1.selectAll)(selector, this, {
|
|
777
781
|
xmlMode: true,
|
|
778
782
|
adapter: matcher_1.default
|
|
779
783
|
});
|
|
@@ -841,7 +845,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
841
845
|
* @return {HTMLElement} matching node
|
|
842
846
|
*/
|
|
843
847
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
844
|
-
return css_select_1.selectOne(selector, this, {
|
|
848
|
+
return (0, css_select_1.selectOne)(selector, this, {
|
|
845
849
|
xmlMode: true,
|
|
846
850
|
adapter: matcher_1.default
|
|
847
851
|
});
|
|
@@ -914,17 +918,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
914
918
|
}
|
|
915
919
|
el = this;
|
|
916
920
|
while (el) {
|
|
917
|
-
var e = css_select_1.selectOne(selector, el, {
|
|
921
|
+
var e = (0, css_select_1.selectOne)(selector, el, {
|
|
918
922
|
xmlMode: true,
|
|
919
923
|
adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
|
|
920
924
|
var child = mapChild.get(node);
|
|
921
925
|
return child && [child];
|
|
922
|
-
},
|
|
923
|
-
getSiblings: function (node) {
|
|
926
|
+
}, getSiblings: function (node) {
|
|
924
927
|
return [node];
|
|
925
|
-
},
|
|
926
|
-
findOne: findOne,
|
|
927
|
-
findAll: function () {
|
|
928
|
+
}, findOne: findOne, findAll: function () {
|
|
928
929
|
return [];
|
|
929
930
|
} })
|
|
930
931
|
});
|
|
@@ -963,7 +964,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
963
964
|
* @return {Node} last child node
|
|
964
965
|
*/
|
|
965
966
|
get: function () {
|
|
966
|
-
return back_1.default(this.childNodes);
|
|
967
|
+
return (0, back_1.default)(this.childNodes);
|
|
967
968
|
},
|
|
968
969
|
enumerable: false,
|
|
969
970
|
configurable: true
|
|
@@ -1125,7 +1126,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1125
1126
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
1126
1127
|
return child === _this;
|
|
1127
1128
|
});
|
|
1128
|
-
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes));
|
|
1129
|
+
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes, false));
|
|
1129
1130
|
p.childNodes.forEach(function (n) {
|
|
1130
1131
|
if (n instanceof HTMLElement) {
|
|
1131
1132
|
n.parentNode = _this.parentNode;
|
|
@@ -1144,7 +1145,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1144
1145
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
1145
1146
|
return child === _this;
|
|
1146
1147
|
});
|
|
1147
|
-
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes));
|
|
1148
|
+
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes, false));
|
|
1148
1149
|
p.childNodes.forEach(function (n) {
|
|
1149
1150
|
if (n instanceof HTMLElement) {
|
|
1150
1151
|
n.parentNode = _this.parentNode;
|
|
@@ -1319,39 +1320,49 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1319
1320
|
return it.test(tag);
|
|
1320
1321
|
});
|
|
1321
1322
|
}
|
|
1322
|
-
var
|
|
1323
|
+
var createRange = function (startPos, endPos) {
|
|
1324
|
+
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1325
|
+
};
|
|
1326
|
+
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1323
1327
|
var currentParent = root;
|
|
1324
1328
|
var stack = [root];
|
|
1325
1329
|
var lastTextPos = -1;
|
|
1326
1330
|
var match;
|
|
1327
1331
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1328
1332
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
1329
|
-
var
|
|
1333
|
+
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1334
|
+
var frameFlagOffset = frameflag.length + 2;
|
|
1335
|
+
while ((match = kMarkupPattern.exec(data))) {
|
|
1336
|
+
var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
|
|
1337
|
+
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1338
|
+
// Add TextNode if content
|
|
1330
1339
|
if (lastTextPos > -1) {
|
|
1331
|
-
if (lastTextPos + match[0].length <
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
currentParent.appendChild(new text_1.default(text, currentParent));
|
|
1340
|
+
if (lastTextPos + match[0].length < tagEndPos) {
|
|
1341
|
+
var text = data.substring(lastTextPos, tagStartPos);
|
|
1342
|
+
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1335
1343
|
}
|
|
1336
1344
|
}
|
|
1337
1345
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1346
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1347
|
+
// Skip frameflag node
|
|
1348
|
+
if (match[2] === frameflag)
|
|
1349
|
+
continue;
|
|
1350
|
+
// Handle comments
|
|
1341
1351
|
if (match[0][1] === '!') {
|
|
1342
|
-
// this is a comment
|
|
1343
1352
|
if (options.comment) {
|
|
1344
1353
|
// Only keep what is in between <!-- and -->
|
|
1345
|
-
var text = data.substring(
|
|
1346
|
-
currentParent.appendChild(new comment_1.default(text, currentParent));
|
|
1354
|
+
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
1355
|
+
currentParent.appendChild(new comment_1.default(text, currentParent, createRange(tagStartPos, tagEndPos)));
|
|
1347
1356
|
}
|
|
1348
|
-
|
|
1357
|
+
continue;
|
|
1349
1358
|
}
|
|
1350
|
-
|
|
1359
|
+
/* -- Handle tag matching -- */
|
|
1360
|
+
// Fix tag casing if necessary
|
|
1361
|
+
if (options.lowerCaseTagName)
|
|
1351
1362
|
match[2] = match[2].toLowerCase();
|
|
1352
|
-
|
|
1363
|
+
// Handle opening tags (ie. <this> not </that>)
|
|
1353
1364
|
if (!match[1]) {
|
|
1354
|
-
|
|
1365
|
+
/* Populate attributes */
|
|
1355
1366
|
var attrs = {};
|
|
1356
1367
|
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
|
|
1357
1368
|
attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
|
|
@@ -1360,50 +1371,46 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1360
1371
|
if (!match[4] && kElementsClosedByOpening[tagName]) {
|
|
1361
1372
|
if (kElementsClosedByOpening[tagName][match[2]]) {
|
|
1362
1373
|
stack.pop();
|
|
1363
|
-
currentParent = back_1.default(stack);
|
|
1374
|
+
currentParent = (0, back_1.default)(stack);
|
|
1364
1375
|
}
|
|
1365
1376
|
}
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
currentParent = currentParent.appendChild(
|
|
1377
|
+
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1378
|
+
var tagStartPos_1 = tagEndPos_1 - match[0].length;
|
|
1379
|
+
currentParent = currentParent.appendChild(
|
|
1380
|
+
// Initialize range (end position updated later for closed tags)
|
|
1381
|
+
new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1369
1382
|
stack.push(currentParent);
|
|
1370
1383
|
if (is_block_text_element(match[2])) {
|
|
1371
|
-
//
|
|
1372
|
-
var
|
|
1373
|
-
var
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
|
|
1378
|
-
})();
|
|
1384
|
+
// Find closing tag
|
|
1385
|
+
var closeMarkup = "</" + match[2] + ">";
|
|
1386
|
+
var closeIndex = options.lowerCaseTagName
|
|
1387
|
+
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1388
|
+
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1389
|
+
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1379
1390
|
if (element_should_be_ignore(match[2])) {
|
|
1380
|
-
var text =
|
|
1381
|
-
if (
|
|
1382
|
-
|
|
1383
|
-
text = data.substr(kMarkupPattern.lastIndex);
|
|
1384
|
-
}
|
|
1385
|
-
else {
|
|
1386
|
-
text = data.substring(kMarkupPattern.lastIndex, index);
|
|
1387
|
-
}
|
|
1388
|
-
if (text.length > 0) {
|
|
1389
|
-
currentParent.appendChild(new text_1.default(text, currentParent));
|
|
1391
|
+
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1392
|
+
if (text.length > 0 && /\S/.test(text)) {
|
|
1393
|
+
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
1390
1394
|
}
|
|
1391
1395
|
}
|
|
1392
|
-
if (
|
|
1396
|
+
if (closeIndex === -1) {
|
|
1393
1397
|
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
|
|
1394
1398
|
}
|
|
1395
1399
|
else {
|
|
1396
|
-
lastTextPos = kMarkupPattern.lastIndex =
|
|
1400
|
+
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1401
|
+
// Cause to be treated as self-closing, because no close found
|
|
1397
1402
|
match[1] = 'true';
|
|
1398
1403
|
}
|
|
1399
1404
|
}
|
|
1400
1405
|
}
|
|
1406
|
+
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1401
1407
|
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
|
|
1402
|
-
// </ or /> or <br> etc.
|
|
1403
1408
|
while (true) {
|
|
1404
1409
|
if (currentParent.rawTagName === match[2]) {
|
|
1410
|
+
// Update range end for closed tag
|
|
1411
|
+
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1405
1412
|
stack.pop();
|
|
1406
|
-
currentParent = back_1.default(stack);
|
|
1413
|
+
currentParent = (0, back_1.default)(stack);
|
|
1407
1414
|
break;
|
|
1408
1415
|
}
|
|
1409
1416
|
else {
|
|
@@ -1412,7 +1419,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1412
1419
|
if (kElementsClosedByClosing[tagName]) {
|
|
1413
1420
|
if (kElementsClosedByClosing[tagName][match[2]]) {
|
|
1414
1421
|
stack.pop();
|
|
1415
|
-
currentParent = back_1.default(stack);
|
|
1422
|
+
currentParent = (0, back_1.default)(stack);
|
|
1416
1423
|
continue;
|
|
1417
1424
|
}
|
|
1418
1425
|
}
|
|
@@ -1421,9 +1428,6 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1421
1428
|
}
|
|
1422
1429
|
}
|
|
1423
1430
|
}
|
|
1424
|
-
};
|
|
1425
|
-
while ((match = kMarkupPattern.exec(data))) {
|
|
1426
|
-
_loop_1();
|
|
1427
1431
|
}
|
|
1428
1432
|
return stack;
|
|
1429
1433
|
}
|
|
@@ -1436,10 +1440,10 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1436
1440
|
if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
|
|
1437
1441
|
var stack = base_parse(data, options);
|
|
1438
1442
|
var root = stack[0];
|
|
1439
|
-
var
|
|
1443
|
+
var _loop_1 = function () {
|
|
1440
1444
|
// Handle each error elements.
|
|
1441
1445
|
var last = stack.pop();
|
|
1442
|
-
var oneBefore = back_1.default(stack);
|
|
1446
|
+
var oneBefore = (0, back_1.default)(stack);
|
|
1443
1447
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1444
1448
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1445
1449
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
@@ -1462,7 +1466,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
|
|
|
1462
1466
|
}
|
|
1463
1467
|
};
|
|
1464
1468
|
while (stack.length > 1) {
|
|
1465
|
-
|
|
1469
|
+
_loop_1();
|
|
1466
1470
|
}
|
|
1467
1471
|
// response.childNodes.forEach((node) => {
|
|
1468
1472
|
// if (node instanceof HTMLElement) {
|
|
@@ -1480,10 +1484,16 @@ define("nodes/node", ["require", "exports", "he"], function (require, exports, h
|
|
|
1480
1484
|
* Node Class as base class for TextNode and HTMLElement.
|
|
1481
1485
|
*/
|
|
1482
1486
|
var Node = /** @class */ (function () {
|
|
1483
|
-
function Node(parentNode) {
|
|
1487
|
+
function Node(parentNode, range) {
|
|
1484
1488
|
if (parentNode === void 0) { parentNode = null; }
|
|
1485
1489
|
this.parentNode = parentNode;
|
|
1486
1490
|
this.childNodes = [];
|
|
1491
|
+
Object.defineProperty(this, 'range', {
|
|
1492
|
+
enumerable: false,
|
|
1493
|
+
writable: true,
|
|
1494
|
+
configurable: true,
|
|
1495
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
1496
|
+
});
|
|
1487
1497
|
}
|
|
1488
1498
|
Object.defineProperty(Node.prototype, "innerText", {
|
|
1489
1499
|
get: function () {
|
|
@@ -1494,10 +1504,10 @@ define("nodes/node", ["require", "exports", "he"], function (require, exports, h
|
|
|
1494
1504
|
});
|
|
1495
1505
|
Object.defineProperty(Node.prototype, "textContent", {
|
|
1496
1506
|
get: function () {
|
|
1497
|
-
return he_3.decode(this.rawText);
|
|
1507
|
+
return (0, he_3.decode)(this.rawText);
|
|
1498
1508
|
},
|
|
1499
1509
|
set: function (val) {
|
|
1500
|
-
this.rawText = he_3.encode(val);
|
|
1510
|
+
this.rawText = (0, he_3.encode)(val);
|
|
1501
1511
|
},
|
|
1502
1512
|
enumerable: false,
|
|
1503
1513
|
configurable: true
|
|
@@ -1513,8 +1523,8 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
|
|
|
1513
1523
|
type_4 = __importDefault(type_4);
|
|
1514
1524
|
var CommentNode = /** @class */ (function (_super) {
|
|
1515
1525
|
__extends(CommentNode, _super);
|
|
1516
|
-
function CommentNode(rawText, parentNode) {
|
|
1517
|
-
var _this = _super.call(this, parentNode) || this;
|
|
1526
|
+
function CommentNode(rawText, parentNode, range) {
|
|
1527
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
1518
1528
|
_this.rawText = rawText;
|
|
1519
1529
|
/**
|
|
1520
1530
|
* Node Type declaration.
|
|
@@ -1556,7 +1566,7 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
|
|
|
1556
1566
|
*/
|
|
1557
1567
|
function valid(data, options) {
|
|
1558
1568
|
if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
|
|
1559
|
-
var stack = html_2.base_parse(data, options);
|
|
1569
|
+
var stack = (0, html_2.base_parse)(data, options);
|
|
1560
1570
|
return Boolean(stack.length === 1);
|
|
1561
1571
|
}
|
|
1562
1572
|
exports.default = valid;
|
package/dist/nodes/comment.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ import NodeType from './type';
|
|
|
3
3
|
import HTMLElement from './html';
|
|
4
4
|
export default class CommentNode extends Node {
|
|
5
5
|
rawText: string;
|
|
6
|
-
constructor(rawText: string, parentNode: HTMLElement);
|
|
6
|
+
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
7
7
|
/**
|
|
8
8
|
* Node Type declaration.
|
|
9
9
|
* @type {Number}
|
|
@@ -14,5 +14,5 @@ export default class CommentNode extends Node {
|
|
|
14
14
|
* @return {string} text content
|
|
15
15
|
*/
|
|
16
16
|
get text(): string;
|
|
17
|
-
toString():
|
|
17
|
+
toString(): string;
|
|
18
18
|
}
|
package/dist/nodes/comment.js
CHANGED
|
@@ -22,8 +22,8 @@ var node_1 = __importDefault(require("./node"));
|
|
|
22
22
|
var type_1 = __importDefault(require("./type"));
|
|
23
23
|
var CommentNode = /** @class */ (function (_super) {
|
|
24
24
|
__extends(CommentNode, _super);
|
|
25
|
-
function CommentNode(rawText, parentNode) {
|
|
26
|
-
var _this = _super.call(this, parentNode) || this;
|
|
25
|
+
function CommentNode(rawText, parentNode, range) {
|
|
26
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
27
27
|
_this.rawText = rawText;
|
|
28
28
|
/**
|
|
29
29
|
* Node Type declaration.
|
package/dist/nodes/html.d.ts
CHANGED
|
@@ -59,7 +59,7 @@ export default class HTMLElement extends Node {
|
|
|
59
59
|
*
|
|
60
60
|
* @memberof HTMLElement
|
|
61
61
|
*/
|
|
62
|
-
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null);
|
|
62
|
+
constructor(tagName: string, keyAttrs: KeyAttributes, rawAttrs: string, parentNode: HTMLElement | null, range?: [number, number]);
|
|
63
63
|
/**
|
|
64
64
|
* Remove current element
|
|
65
65
|
*/
|
package/dist/nodes/html.js
CHANGED
|
@@ -25,10 +25,14 @@ var __assign = (this && this.__assign) || function () {
|
|
|
25
25
|
};
|
|
26
26
|
return __assign.apply(this, arguments);
|
|
27
27
|
};
|
|
28
|
-
var __spreadArray = (this && this.__spreadArray) || function (to, from) {
|
|
29
|
-
for (var i = 0,
|
|
30
|
-
|
|
31
|
-
|
|
28
|
+
var __spreadArray = (this && this.__spreadArray) || function (to, from, pack) {
|
|
29
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
30
|
+
if (ar || !(i in from)) {
|
|
31
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
32
|
+
ar[i] = from[i];
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return to.concat(ar || from);
|
|
32
36
|
};
|
|
33
37
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
34
38
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
@@ -200,9 +204,9 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
200
204
|
*
|
|
201
205
|
* @memberof HTMLElement
|
|
202
206
|
*/
|
|
203
|
-
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode) {
|
|
207
|
+
function HTMLElement(tagName, keyAttrs, rawAttrs, parentNode, range) {
|
|
204
208
|
if (rawAttrs === void 0) { rawAttrs = ''; }
|
|
205
|
-
var _this = _super.call(this, parentNode) || this;
|
|
209
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
206
210
|
_this.rawAttrs = rawAttrs;
|
|
207
211
|
/**
|
|
208
212
|
* Node Type declaration.
|
|
@@ -434,7 +438,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
434
438
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
435
439
|
return child === _this;
|
|
436
440
|
});
|
|
437
|
-
this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx)), content), this.parentNode.childNodes.slice(idx + 1));
|
|
441
|
+
this.parentNode.childNodes = __spreadArray(__spreadArray(__spreadArray([], this.parentNode.childNodes.slice(0, idx), true), content, true), this.parentNode.childNodes.slice(idx + 1), true);
|
|
438
442
|
};
|
|
439
443
|
Object.defineProperty(HTMLElement.prototype, "outerHTML", {
|
|
440
444
|
get: function () {
|
|
@@ -527,7 +531,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
527
531
|
* @return {HTMLElement[]} matching elements
|
|
528
532
|
*/
|
|
529
533
|
HTMLElement.prototype.querySelectorAll = function (selector) {
|
|
530
|
-
return css_select_1.selectAll(selector, this, {
|
|
534
|
+
return (0, css_select_1.selectAll)(selector, this, {
|
|
531
535
|
xmlMode: true,
|
|
532
536
|
adapter: matcher_1.default
|
|
533
537
|
});
|
|
@@ -595,7 +599,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
595
599
|
* @return {HTMLElement} matching node
|
|
596
600
|
*/
|
|
597
601
|
HTMLElement.prototype.querySelector = function (selector) {
|
|
598
|
-
return css_select_1.selectOne(selector, this, {
|
|
602
|
+
return (0, css_select_1.selectOne)(selector, this, {
|
|
599
603
|
xmlMode: true,
|
|
600
604
|
adapter: matcher_1.default
|
|
601
605
|
});
|
|
@@ -668,17 +672,14 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
668
672
|
}
|
|
669
673
|
el = this;
|
|
670
674
|
while (el) {
|
|
671
|
-
var e = css_select_1.selectOne(selector, el, {
|
|
675
|
+
var e = (0, css_select_1.selectOne)(selector, el, {
|
|
672
676
|
xmlMode: true,
|
|
673
677
|
adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
|
|
674
678
|
var child = mapChild.get(node);
|
|
675
679
|
return child && [child];
|
|
676
|
-
},
|
|
677
|
-
getSiblings: function (node) {
|
|
680
|
+
}, getSiblings: function (node) {
|
|
678
681
|
return [node];
|
|
679
|
-
},
|
|
680
|
-
findOne: findOne,
|
|
681
|
-
findAll: function () {
|
|
682
|
+
}, findOne: findOne, findAll: function () {
|
|
682
683
|
return [];
|
|
683
684
|
} })
|
|
684
685
|
});
|
|
@@ -717,7 +718,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
717
718
|
* @return {Node} last child node
|
|
718
719
|
*/
|
|
719
720
|
get: function () {
|
|
720
|
-
return back_1.default(this.childNodes);
|
|
721
|
+
return (0, back_1.default)(this.childNodes);
|
|
721
722
|
},
|
|
722
723
|
enumerable: false,
|
|
723
724
|
configurable: true
|
|
@@ -879,7 +880,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
879
880
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
880
881
|
return child === _this;
|
|
881
882
|
});
|
|
882
|
-
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes));
|
|
883
|
+
(_a = this.parentNode.childNodes).splice.apply(_a, __spreadArray([idx + 1, 0], p.childNodes, false));
|
|
883
884
|
p.childNodes.forEach(function (n) {
|
|
884
885
|
if (n instanceof HTMLElement) {
|
|
885
886
|
n.parentNode = _this.parentNode;
|
|
@@ -898,7 +899,7 @@ var HTMLElement = /** @class */ (function (_super) {
|
|
|
898
899
|
var idx = this.parentNode.childNodes.findIndex(function (child) {
|
|
899
900
|
return child === _this;
|
|
900
901
|
});
|
|
901
|
-
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes));
|
|
902
|
+
(_c = this.parentNode.childNodes).splice.apply(_c, __spreadArray([idx, 0], p.childNodes, false));
|
|
902
903
|
p.childNodes.forEach(function (n) {
|
|
903
904
|
if (n instanceof HTMLElement) {
|
|
904
905
|
n.parentNode = _this.parentNode;
|
|
@@ -1073,39 +1074,49 @@ function base_parse(data, options) {
|
|
|
1073
1074
|
return it.test(tag);
|
|
1074
1075
|
});
|
|
1075
1076
|
}
|
|
1076
|
-
var
|
|
1077
|
+
var createRange = function (startPos, endPos) {
|
|
1078
|
+
return [startPos - frameFlagOffset, endPos - frameFlagOffset];
|
|
1079
|
+
};
|
|
1080
|
+
var root = new HTMLElement(null, {}, '', null, [0, data.length]);
|
|
1077
1081
|
var currentParent = root;
|
|
1078
1082
|
var stack = [root];
|
|
1079
1083
|
var lastTextPos = -1;
|
|
1080
1084
|
var match;
|
|
1081
1085
|
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1082
1086
|
data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
|
|
1083
|
-
var
|
|
1087
|
+
var dataEndPos = data.length - (frameflag.length + 2);
|
|
1088
|
+
var frameFlagOffset = frameflag.length + 2;
|
|
1089
|
+
while ((match = kMarkupPattern.exec(data))) {
|
|
1090
|
+
var tagStartPos = kMarkupPattern.lastIndex - match[0].length;
|
|
1091
|
+
var tagEndPos = kMarkupPattern.lastIndex;
|
|
1092
|
+
// Add TextNode if content
|
|
1084
1093
|
if (lastTextPos > -1) {
|
|
1085
|
-
if (lastTextPos + match[0].length <
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
currentParent.appendChild(new text_1.default(text, currentParent));
|
|
1094
|
+
if (lastTextPos + match[0].length < tagEndPos) {
|
|
1095
|
+
var text = data.substring(lastTextPos, tagStartPos);
|
|
1096
|
+
currentParent.appendChild(new text_1.default(text, currentParent, createRange(lastTextPos, tagStartPos)));
|
|
1089
1097
|
}
|
|
1090
1098
|
}
|
|
1091
1099
|
lastTextPos = kMarkupPattern.lastIndex;
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1100
|
+
// https://github.com/taoqf/node-html-parser/issues/38
|
|
1101
|
+
// Skip frameflag node
|
|
1102
|
+
if (match[2] === frameflag)
|
|
1103
|
+
continue;
|
|
1104
|
+
// Handle comments
|
|
1095
1105
|
if (match[0][1] === '!') {
|
|
1096
|
-
// this is a comment
|
|
1097
1106
|
if (options.comment) {
|
|
1098
1107
|
// Only keep what is in between <!-- and -->
|
|
1099
|
-
var text = data.substring(
|
|
1100
|
-
currentParent.appendChild(new comment_1.default(text, currentParent));
|
|
1108
|
+
var text = data.substring(tagStartPos + 4, tagEndPos - 3);
|
|
1109
|
+
currentParent.appendChild(new comment_1.default(text, currentParent, createRange(tagStartPos, tagEndPos)));
|
|
1101
1110
|
}
|
|
1102
|
-
|
|
1111
|
+
continue;
|
|
1103
1112
|
}
|
|
1104
|
-
|
|
1113
|
+
/* -- Handle tag matching -- */
|
|
1114
|
+
// Fix tag casing if necessary
|
|
1115
|
+
if (options.lowerCaseTagName)
|
|
1105
1116
|
match[2] = match[2].toLowerCase();
|
|
1106
|
-
|
|
1117
|
+
// Handle opening tags (ie. <this> not </that>)
|
|
1107
1118
|
if (!match[1]) {
|
|
1108
|
-
|
|
1119
|
+
/* Populate attributes */
|
|
1109
1120
|
var attrs = {};
|
|
1110
1121
|
for (var attMatch = void 0; (attMatch = kAttributePattern.exec(match[3]));) {
|
|
1111
1122
|
attrs[attMatch[2].toLowerCase()] = attMatch[4] || attMatch[5] || attMatch[6];
|
|
@@ -1114,50 +1125,46 @@ function base_parse(data, options) {
|
|
|
1114
1125
|
if (!match[4] && kElementsClosedByOpening[tagName]) {
|
|
1115
1126
|
if (kElementsClosedByOpening[tagName][match[2]]) {
|
|
1116
1127
|
stack.pop();
|
|
1117
|
-
currentParent = back_1.default(stack);
|
|
1128
|
+
currentParent = (0, back_1.default)(stack);
|
|
1118
1129
|
}
|
|
1119
1130
|
}
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
currentParent = currentParent.appendChild(
|
|
1131
|
+
var tagEndPos_1 = kMarkupPattern.lastIndex;
|
|
1132
|
+
var tagStartPos_1 = tagEndPos_1 - match[0].length;
|
|
1133
|
+
currentParent = currentParent.appendChild(
|
|
1134
|
+
// Initialize range (end position updated later for closed tags)
|
|
1135
|
+
new HTMLElement(match[2], attrs, match[3], null, createRange(tagStartPos_1, tagEndPos_1)));
|
|
1123
1136
|
stack.push(currentParent);
|
|
1124
1137
|
if (is_block_text_element(match[2])) {
|
|
1125
|
-
//
|
|
1126
|
-
var
|
|
1127
|
-
var
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
return data.indexOf(closeMarkup_1, kMarkupPattern.lastIndex);
|
|
1132
|
-
})();
|
|
1138
|
+
// Find closing tag
|
|
1139
|
+
var closeMarkup = "</" + match[2] + ">";
|
|
1140
|
+
var closeIndex = options.lowerCaseTagName
|
|
1141
|
+
? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
|
|
1142
|
+
: data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
|
|
1143
|
+
var textEndPos = closeIndex === -1 ? dataEndPos : closeIndex;
|
|
1133
1144
|
if (element_should_be_ignore(match[2])) {
|
|
1134
|
-
var text =
|
|
1135
|
-
if (
|
|
1136
|
-
|
|
1137
|
-
text = data.substr(kMarkupPattern.lastIndex);
|
|
1138
|
-
}
|
|
1139
|
-
else {
|
|
1140
|
-
text = data.substring(kMarkupPattern.lastIndex, index);
|
|
1141
|
-
}
|
|
1142
|
-
if (text.length > 0) {
|
|
1143
|
-
currentParent.appendChild(new text_1.default(text, currentParent));
|
|
1145
|
+
var text = data.substring(tagEndPos_1, textEndPos);
|
|
1146
|
+
if (text.length > 0 && /\S/.test(text)) {
|
|
1147
|
+
currentParent.appendChild(new text_1.default(text, currentParent, createRange(tagEndPos_1, textEndPos)));
|
|
1144
1148
|
}
|
|
1145
1149
|
}
|
|
1146
|
-
if (
|
|
1150
|
+
if (closeIndex === -1) {
|
|
1147
1151
|
lastTextPos = kMarkupPattern.lastIndex = data.length + 1;
|
|
1148
1152
|
}
|
|
1149
1153
|
else {
|
|
1150
|
-
lastTextPos = kMarkupPattern.lastIndex =
|
|
1154
|
+
lastTextPos = kMarkupPattern.lastIndex = closeIndex + closeMarkup.length;
|
|
1155
|
+
// Cause to be treated as self-closing, because no close found
|
|
1151
1156
|
match[1] = 'true';
|
|
1152
1157
|
}
|
|
1153
1158
|
}
|
|
1154
1159
|
}
|
|
1160
|
+
// Handle closing tags or self-closed elements (ie </tag> or <br>)
|
|
1155
1161
|
if (match[1] || match[4] || kSelfClosingElements[match[2]]) {
|
|
1156
|
-
// </ or /> or <br> etc.
|
|
1157
1162
|
while (true) {
|
|
1158
1163
|
if (currentParent.rawTagName === match[2]) {
|
|
1164
|
+
// Update range end for closed tag
|
|
1165
|
+
currentParent.range[1] = createRange(-1, Math.max(lastTextPos, tagEndPos))[1];
|
|
1159
1166
|
stack.pop();
|
|
1160
|
-
currentParent = back_1.default(stack);
|
|
1167
|
+
currentParent = (0, back_1.default)(stack);
|
|
1161
1168
|
break;
|
|
1162
1169
|
}
|
|
1163
1170
|
else {
|
|
@@ -1166,7 +1173,7 @@ function base_parse(data, options) {
|
|
|
1166
1173
|
if (kElementsClosedByClosing[tagName]) {
|
|
1167
1174
|
if (kElementsClosedByClosing[tagName][match[2]]) {
|
|
1168
1175
|
stack.pop();
|
|
1169
|
-
currentParent = back_1.default(stack);
|
|
1176
|
+
currentParent = (0, back_1.default)(stack);
|
|
1170
1177
|
continue;
|
|
1171
1178
|
}
|
|
1172
1179
|
}
|
|
@@ -1175,9 +1182,6 @@ function base_parse(data, options) {
|
|
|
1175
1182
|
}
|
|
1176
1183
|
}
|
|
1177
1184
|
}
|
|
1178
|
-
};
|
|
1179
|
-
while ((match = kMarkupPattern.exec(data))) {
|
|
1180
|
-
_loop_1();
|
|
1181
1185
|
}
|
|
1182
1186
|
return stack;
|
|
1183
1187
|
}
|
|
@@ -1190,10 +1194,10 @@ function parse(data, options) {
|
|
|
1190
1194
|
if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
|
|
1191
1195
|
var stack = base_parse(data, options);
|
|
1192
1196
|
var root = stack[0];
|
|
1193
|
-
var
|
|
1197
|
+
var _loop_1 = function () {
|
|
1194
1198
|
// Handle each error elements.
|
|
1195
1199
|
var last = stack.pop();
|
|
1196
|
-
var oneBefore = back_1.default(stack);
|
|
1200
|
+
var oneBefore = (0, back_1.default)(stack);
|
|
1197
1201
|
if (last.parentNode && last.parentNode.parentNode) {
|
|
1198
1202
|
if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
|
|
1199
1203
|
// Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
|
|
@@ -1216,7 +1220,7 @@ function parse(data, options) {
|
|
|
1216
1220
|
}
|
|
1217
1221
|
};
|
|
1218
1222
|
while (stack.length > 1) {
|
|
1219
|
-
|
|
1223
|
+
_loop_1();
|
|
1220
1224
|
}
|
|
1221
1225
|
// response.childNodes.forEach((node) => {
|
|
1222
1226
|
// if (node instanceof HTMLElement) {
|
package/dist/nodes/node.d.ts
CHANGED
|
@@ -7,10 +7,11 @@ export default abstract class Node {
|
|
|
7
7
|
parentNode: HTMLElement;
|
|
8
8
|
abstract nodeType: NodeType;
|
|
9
9
|
childNodes: Node[];
|
|
10
|
+
range: readonly [number, number];
|
|
10
11
|
abstract text: string;
|
|
11
12
|
abstract rawText: string;
|
|
12
13
|
abstract toString(): string;
|
|
13
|
-
constructor(parentNode?: HTMLElement);
|
|
14
|
+
constructor(parentNode?: HTMLElement, range?: [number, number]);
|
|
14
15
|
get innerText(): string;
|
|
15
16
|
get textContent(): string;
|
|
16
17
|
set textContent(val: string);
|
package/dist/nodes/node.js
CHANGED
|
@@ -5,10 +5,16 @@ var he_1 = require("he");
|
|
|
5
5
|
* Node Class as base class for TextNode and HTMLElement.
|
|
6
6
|
*/
|
|
7
7
|
var Node = /** @class */ (function () {
|
|
8
|
-
function Node(parentNode) {
|
|
8
|
+
function Node(parentNode, range) {
|
|
9
9
|
if (parentNode === void 0) { parentNode = null; }
|
|
10
10
|
this.parentNode = parentNode;
|
|
11
11
|
this.childNodes = [];
|
|
12
|
+
Object.defineProperty(this, 'range', {
|
|
13
|
+
enumerable: false,
|
|
14
|
+
writable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
value: range !== null && range !== void 0 ? range : [-1, -1]
|
|
17
|
+
});
|
|
12
18
|
}
|
|
13
19
|
Object.defineProperty(Node.prototype, "innerText", {
|
|
14
20
|
get: function () {
|
|
@@ -19,10 +25,10 @@ var Node = /** @class */ (function () {
|
|
|
19
25
|
});
|
|
20
26
|
Object.defineProperty(Node.prototype, "textContent", {
|
|
21
27
|
get: function () {
|
|
22
|
-
return he_1.decode(this.rawText);
|
|
28
|
+
return (0, he_1.decode)(this.rawText);
|
|
23
29
|
},
|
|
24
30
|
set: function (val) {
|
|
25
|
-
this.rawText = he_1.encode(val);
|
|
31
|
+
this.rawText = (0, he_1.encode)(val);
|
|
26
32
|
},
|
|
27
33
|
enumerable: false,
|
|
28
34
|
configurable: true
|
package/dist/nodes/text.d.ts
CHANGED
|
@@ -6,7 +6,7 @@ import NodeType from './type';
|
|
|
6
6
|
* @param {string} value [description]
|
|
7
7
|
*/
|
|
8
8
|
export default class TextNode extends Node {
|
|
9
|
-
constructor(rawText: string, parentNode: HTMLElement);
|
|
9
|
+
constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
|
|
10
10
|
/**
|
|
11
11
|
* Node Type declaration.
|
|
12
12
|
* @type {Number}
|
package/dist/nodes/text.js
CHANGED
|
@@ -27,8 +27,8 @@ var type_1 = __importDefault(require("./type"));
|
|
|
27
27
|
*/
|
|
28
28
|
var TextNode = /** @class */ (function (_super) {
|
|
29
29
|
__extends(TextNode, _super);
|
|
30
|
-
function TextNode(rawText, parentNode) {
|
|
31
|
-
var _this = _super.call(this, parentNode) || this;
|
|
30
|
+
function TextNode(rawText, parentNode, range) {
|
|
31
|
+
var _this = _super.call(this, parentNode, range) || this;
|
|
32
32
|
/**
|
|
33
33
|
* Node Type declaration.
|
|
34
34
|
* @type {Number}
|
|
@@ -84,7 +84,7 @@ var TextNode = /** @class */ (function (_super) {
|
|
|
84
84
|
* @return {string} text content
|
|
85
85
|
*/
|
|
86
86
|
get: function () {
|
|
87
|
-
return he_1.decode(this.rawText);
|
|
87
|
+
return (0, he_1.decode)(this.rawText);
|
|
88
88
|
},
|
|
89
89
|
enumerable: false,
|
|
90
90
|
configurable: true
|
package/dist/valid.js
CHANGED
|
@@ -7,7 +7,7 @@ var html_1 = require("./nodes/html");
|
|
|
7
7
|
*/
|
|
8
8
|
function valid(data, options) {
|
|
9
9
|
if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
|
|
10
|
-
var stack = html_1.base_parse(data, options);
|
|
10
|
+
var stack = (0, html_1.base_parse)(data, options);
|
|
11
11
|
return Boolean(stack.length === 1);
|
|
12
12
|
}
|
|
13
13
|
exports.default = valid;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "node-html-parser",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.1.3",
|
|
4
4
|
"description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/esm/index.js",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
"clean": "del-cli ./dist/",
|
|
12
12
|
"ts:cjs": "tsc -m commonjs",
|
|
13
13
|
"ts:amd": "tsc -t es5 -m amd -d false --outFile ./dist/main.js",
|
|
14
|
-
"ts:esm": "tsc -t
|
|
14
|
+
"ts:esm": "tsc -t es2019 -m esnext -d false --outDir ./dist/esm/",
|
|
15
15
|
"build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
|
|
16
16
|
"dev": "tsc -w & mocha -w ./test/*.js",
|
|
17
17
|
"pretest": "tsc -m commonjs",
|
|
@@ -54,6 +54,7 @@
|
|
|
54
54
|
"htmlparser-benchmark": "^1.1.3",
|
|
55
55
|
"htmlparser2": "^6.0.0",
|
|
56
56
|
"mocha": "latest",
|
|
57
|
+
"mocha-each": "^2.0.1",
|
|
57
58
|
"np": "latest",
|
|
58
59
|
"parse5": "^6.0.1",
|
|
59
60
|
"should": "latest",
|