node-html-parser 5.2.0 → 5.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -199,6 +199,10 @@ Returns true if the classname is already in the classList.
199
199
 
200
200
  Get class names.
201
201
 
202
+ #### Node#clone()
203
+
204
+ Clone a node.
205
+
202
206
  ## HTMLElement Properties
203
207
 
204
208
  ### HTMLElement#text
@@ -247,6 +251,14 @@ Returns a reference to the next child node of the current element's parent.
247
251
 
248
252
  Returns a reference to the next child element of the current element's parent.
249
253
 
254
+ ### HTMLElement#previousSibling
255
+
256
+ Returns a reference to the previous child node of the current element's parent.
257
+
258
+ ### HTMLElement#previousElementSibling
259
+
260
+ Returns a reference to the previous child element of the current element's parent.
261
+
250
262
  ### HTMLElement#textContent
251
263
 
252
264
  Get or Set textContent of current element, more efficient than [set_content](#htmlelementset_contentcontent-string--node--node).
package/dist/main.js CHANGED
@@ -1,3 +1,6 @@
1
+ var __importDefault = (this && this.__importDefault) || function (mod) {
2
+ return (mod && mod.__esModule) ? mod : { "default": mod };
3
+ };
1
4
  var __extends = (this && this.__extends) || (function () {
2
5
  var extendStatics = function (d, b) {
3
6
  extendStatics = Object.setPrototypeOf ||
@@ -13,9 +16,6 @@ var __extends = (this && this.__extends) || (function () {
13
16
  d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
14
17
  };
15
18
  })();
16
- var __importDefault = (this && this.__importDefault) || function (mod) {
17
- return (mod && mod.__esModule) ? mod : { "default": mod };
18
- };
19
19
  var __assign = (this && this.__assign) || function () {
20
20
  __assign = Object.assign || function(t) {
21
21
  for (var s, i = 1, n = arguments.length; i < n; i++) {
@@ -55,134 +55,51 @@ define("nodes/type", ["require", "exports"], function (require, exports) {
55
55
  })(NodeType || (NodeType = {}));
56
56
  exports.default = NodeType;
57
57
  });
58
- define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], function (require, exports, he_1, node_1, type_1) {
58
+ define("nodes/node", ["require", "exports", "he"], function (require, exports, he_1) {
59
59
  "use strict";
60
60
  Object.defineProperty(exports, "__esModule", { value: true });
61
- node_1 = __importDefault(node_1);
62
- type_1 = __importDefault(type_1);
63
61
  /**
64
- * TextNode to contain a text element in DOM tree.
65
- * @param {string} value [description]
62
+ * Node Class as base class for TextNode and HTMLElement.
66
63
  */
67
- var TextNode = /** @class */ (function (_super) {
68
- __extends(TextNode, _super);
69
- function TextNode(rawText, parentNode, range) {
70
- var _this = _super.call(this, parentNode, range) || this;
71
- /**
72
- * Node Type declaration.
73
- * @type {Number}
74
- */
75
- _this.nodeType = type_1.default.TEXT_NODE;
76
- _this._rawText = rawText;
77
- return _this;
64
+ var Node = /** @class */ (function () {
65
+ function Node(parentNode, range) {
66
+ if (parentNode === void 0) { parentNode = null; }
67
+ this.parentNode = parentNode;
68
+ this.childNodes = [];
69
+ Object.defineProperty(this, 'range', {
70
+ enumerable: false,
71
+ writable: true,
72
+ configurable: true,
73
+ value: range !== null && range !== void 0 ? range : [-1, -1]
74
+ });
78
75
  }
79
- Object.defineProperty(TextNode.prototype, "rawText", {
80
- get: function () {
81
- return this._rawText;
82
- },
83
- /**
84
- * Set rawText and invalidate trimmed caches
85
- */
86
- set: function (text) {
87
- this._rawText = text;
88
- this._trimmedRawText = void 0;
89
- this._trimmedText = void 0;
90
- },
91
- enumerable: false,
92
- configurable: true
93
- });
94
- Object.defineProperty(TextNode.prototype, "trimmedRawText", {
95
- /**
96
- * Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
97
- */
98
- get: function () {
99
- if (this._trimmedRawText !== undefined)
100
- return this._trimmedRawText;
101
- this._trimmedRawText = trimText(this.rawText);
102
- return this._trimmedRawText;
103
- },
104
- enumerable: false,
105
- configurable: true
106
- });
107
- Object.defineProperty(TextNode.prototype, "trimmedText", {
108
- /**
109
- * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
110
- */
76
+ Object.defineProperty(Node.prototype, "innerText", {
111
77
  get: function () {
112
- if (this._trimmedText !== undefined)
113
- return this._trimmedText;
114
- this._trimmedText = trimText(this.text);
115
- return this._trimmedText;
78
+ return this.rawText;
116
79
  },
117
80
  enumerable: false,
118
81
  configurable: true
119
82
  });
120
- Object.defineProperty(TextNode.prototype, "text", {
121
- /**
122
- * Get unescaped text value of current node and its children.
123
- * @return {string} text content
124
- */
83
+ Object.defineProperty(Node.prototype, "textContent", {
125
84
  get: function () {
126
85
  return (0, he_1.decode)(this.rawText);
127
86
  },
128
- enumerable: false,
129
- configurable: true
130
- });
131
- Object.defineProperty(TextNode.prototype, "isWhitespace", {
132
- /**
133
- * Detect if the node contains only white space.
134
- * @return {boolean}
135
- */
136
- get: function () {
137
- return /^(\s|&nbsp;)*$/.test(this.rawText);
87
+ set: function (val) {
88
+ this.rawText = (0, he_1.encode)(val);
138
89
  },
139
90
  enumerable: false,
140
91
  configurable: true
141
92
  });
142
- TextNode.prototype.toString = function () {
143
- return this.rawText;
144
- };
145
- return TextNode;
146
- }(node_1.default));
147
- exports.default = TextNode;
148
- /**
149
- * Trim whitespace except single leading/trailing non-breaking space
150
- */
151
- function trimText(text) {
152
- var i = 0;
153
- var startPos;
154
- var endPos;
155
- while (i >= 0 && i < text.length) {
156
- if (/\S/.test(text[i])) {
157
- if (startPos === undefined) {
158
- startPos = i;
159
- i = text.length;
160
- }
161
- else {
162
- endPos = i;
163
- i = void 0;
164
- }
165
- }
166
- if (startPos === undefined)
167
- i++;
168
- else
169
- i--;
170
- }
171
- if (startPos === undefined)
172
- startPos = 0;
173
- if (endPos === undefined)
174
- endPos = text.length - 1;
175
- var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
176
- var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
177
- return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
178
- }
93
+ return Node;
94
+ }());
95
+ exports.default = Node;
179
96
  });
180
- define("matcher", ["require", "exports", "nodes/type"], function (require, exports, type_2) {
97
+ define("matcher", ["require", "exports", "nodes/type"], function (require, exports, type_1) {
181
98
  "use strict";
182
99
  Object.defineProperty(exports, "__esModule", { value: true });
183
- type_2 = __importDefault(type_2);
100
+ type_1 = __importDefault(type_1);
184
101
  function isTag(node) {
185
- return node && node.nodeType === type_2.default.ELEMENT_NODE;
102
+ return node && node.nodeType === type_1.default.ELEMENT_NODE;
186
103
  }
187
104
  function getAttributeValue(elem, name) {
188
105
  return isTag(elem) ? elem.getAttribute(name) : undefined;
@@ -282,21 +199,146 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
282
199
  findAll: findAll
283
200
  };
284
201
  });
285
- define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "nodes/type", "nodes/text", "matcher", "back", "nodes/comment"], function (require, exports, he_2, css_select_1, node_2, type_3, text_1, matcher_1, back_1, comment_1) {
202
+ define("nodes/text", ["require", "exports", "he", "nodes/node", "nodes/type"], function (require, exports, he_2, node_1, type_2) {
203
+ "use strict";
204
+ Object.defineProperty(exports, "__esModule", { value: true });
205
+ node_1 = __importDefault(node_1);
206
+ type_2 = __importDefault(type_2);
207
+ /**
208
+ * TextNode to contain a text element in DOM tree.
209
+ * @param {string} value [description]
210
+ */
211
+ var TextNode = /** @class */ (function (_super) {
212
+ __extends(TextNode, _super);
213
+ function TextNode(rawText, parentNode, range) {
214
+ var _this = _super.call(this, parentNode, range) || this;
215
+ /**
216
+ * Node Type declaration.
217
+ * @type {Number}
218
+ */
219
+ _this.nodeType = type_2.default.TEXT_NODE;
220
+ _this._rawText = rawText;
221
+ return _this;
222
+ }
223
+ TextNode.prototype.clone = function () {
224
+ return new TextNode(this._rawText, null);
225
+ };
226
+ Object.defineProperty(TextNode.prototype, "rawText", {
227
+ get: function () {
228
+ return this._rawText;
229
+ },
230
+ /**
231
+ * Set rawText and invalidate trimmed caches
232
+ */
233
+ set: function (text) {
234
+ this._rawText = text;
235
+ this._trimmedRawText = void 0;
236
+ this._trimmedText = void 0;
237
+ },
238
+ enumerable: false,
239
+ configurable: true
240
+ });
241
+ Object.defineProperty(TextNode.prototype, "trimmedRawText", {
242
+ /**
243
+ * Returns raw text with all whitespace trimmed except single leading/trailing non-breaking space
244
+ */
245
+ get: function () {
246
+ if (this._trimmedRawText !== undefined)
247
+ return this._trimmedRawText;
248
+ this._trimmedRawText = trimText(this.rawText);
249
+ return this._trimmedRawText;
250
+ },
251
+ enumerable: false,
252
+ configurable: true
253
+ });
254
+ Object.defineProperty(TextNode.prototype, "trimmedText", {
255
+ /**
256
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
257
+ */
258
+ get: function () {
259
+ if (this._trimmedText !== undefined)
260
+ return this._trimmedText;
261
+ this._trimmedText = trimText(this.text);
262
+ return this._trimmedText;
263
+ },
264
+ enumerable: false,
265
+ configurable: true
266
+ });
267
+ Object.defineProperty(TextNode.prototype, "text", {
268
+ /**
269
+ * Get unescaped text value of current node and its children.
270
+ * @return {string} text content
271
+ */
272
+ get: function () {
273
+ return (0, he_2.decode)(this.rawText);
274
+ },
275
+ enumerable: false,
276
+ configurable: true
277
+ });
278
+ Object.defineProperty(TextNode.prototype, "isWhitespace", {
279
+ /**
280
+ * Detect if the node contains only white space.
281
+ * @return {boolean}
282
+ */
283
+ get: function () {
284
+ return /^(\s|&nbsp;)*$/.test(this.rawText);
285
+ },
286
+ enumerable: false,
287
+ configurable: true
288
+ });
289
+ TextNode.prototype.toString = function () {
290
+ return this.rawText;
291
+ };
292
+ return TextNode;
293
+ }(node_1.default));
294
+ exports.default = TextNode;
295
+ /**
296
+ * Trim whitespace except single leading/trailing non-breaking space
297
+ */
298
+ function trimText(text) {
299
+ var i = 0;
300
+ var startPos;
301
+ var endPos;
302
+ while (i >= 0 && i < text.length) {
303
+ if (/\S/.test(text[i])) {
304
+ if (startPos === undefined) {
305
+ startPos = i;
306
+ i = text.length;
307
+ }
308
+ else {
309
+ endPos = i;
310
+ i = void 0;
311
+ }
312
+ }
313
+ if (startPos === undefined)
314
+ i++;
315
+ else
316
+ i--;
317
+ }
318
+ if (startPos === undefined)
319
+ startPos = 0;
320
+ if (endPos === undefined)
321
+ endPos = text.length - 1;
322
+ var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
323
+ var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
324
+ return (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
325
+ }
326
+ });
327
+ define("nodes/html", ["require", "exports", "css-select", "he", "back", "matcher", "nodes/comment", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, css_select_1, he_3, back_1, matcher_1, comment_1, node_2, text_1, type_3) {
286
328
  "use strict";
287
329
  Object.defineProperty(exports, "__esModule", { value: true });
288
330
  exports.parse = exports.base_parse = void 0;
289
- he_2 = __importDefault(he_2);
290
- node_2 = __importDefault(node_2);
291
- type_3 = __importDefault(type_3);
292
- text_1 = __importDefault(text_1);
293
- matcher_1 = __importDefault(matcher_1);
331
+ he_3 = __importDefault(he_3);
294
332
  back_1 = __importDefault(back_1);
333
+ matcher_1 = __importDefault(matcher_1);
295
334
  comment_1 = __importDefault(comment_1);
335
+ node_2 = __importDefault(node_2);
336
+ text_1 = __importDefault(text_1);
337
+ type_3 = __importDefault(type_3);
296
338
  var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
297
339
  function decode(val) {
298
340
  // clone string
299
- return JSON.parse(JSON.stringify(he_2.default.decode(val)));
341
+ return JSON.parse(JSON.stringify(he_3.default.decode(val)));
300
342
  }
301
343
  // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
302
344
  var Htags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup'];
@@ -332,7 +374,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
332
374
  }
333
375
  DOMTokenList.prototype._validate = function (c) {
334
376
  if (/\s/.test(c)) {
335
- throw new Error("DOMException in DOMTokenList.add: The token '" + c + "' contains HTML space characters, which are not valid in tokens.");
377
+ throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
336
378
  }
337
379
  };
338
380
  DOMTokenList.prototype.add = function (c) {
@@ -416,14 +458,14 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
416
458
  );
417
459
  if (keyAttrs.id) {
418
460
  if (!rawAttrs) {
419
- _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
461
+ _this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
420
462
  }
421
463
  }
422
464
  if (keyAttrs.class) {
423
465
  if (!rawAttrs) {
424
- var cls = "class=\"" + _this.classList.toString() + "\"";
466
+ var cls = "class=\"".concat(_this.classList.toString(), "\"");
425
467
  if (_this.rawAttrs) {
426
- _this.rawAttrs += " " + cls;
468
+ _this.rawAttrs += " ".concat(cls);
427
469
  }
428
470
  else {
429
471
  _this.rawAttrs = cls;
@@ -568,7 +610,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
568
610
  else {
569
611
  var text = node.trimmedText;
570
612
  if (currentBlock.prependWhitespace) {
571
- text = " " + text;
613
+ text = " ".concat(text);
572
614
  currentBlock.prependWhitespace = false;
573
615
  }
574
616
  currentBlock.push(text);
@@ -589,8 +631,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
589
631
  HTMLElement.prototype.toString = function () {
590
632
  var tag = this.rawTagName;
591
633
  if (tag) {
592
- var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
593
- return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
634
+ var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
635
+ return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
594
636
  }
595
637
  return this.innerHTML;
596
638
  };
@@ -686,9 +728,9 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
686
728
  res.push(' '.repeat(indention) + str);
687
729
  }
688
730
  function dfs(node) {
689
- var idStr = node.id ? "#" + node.id : '';
690
- var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
691
- write("" + node.rawTagName + idStr + classStr);
731
+ var idStr = node.id ? "#".concat(node.id) : '';
732
+ var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
733
+ write("".concat(node.rawTagName).concat(idStr).concat(classStr));
692
734
  indention++;
693
735
  node.childNodes.forEach(function (childNode) {
694
736
  if (childNode.nodeType === type_3.default.ELEMENT_NODE) {
@@ -947,7 +989,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
947
989
  if (val === undefined || val === 'null') {
948
990
  return name;
949
991
  }
950
- return name + "=" + val;
992
+ return "".concat(name, "=").concat(val);
951
993
  })
952
994
  .join(' ');
953
995
  // Update this.id
@@ -994,7 +1036,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
994
1036
  var val = _this.quoteAttribute(attrs[name]);
995
1037
  if (val === 'null' || val === '""')
996
1038
  return name;
997
- return name + "=" + val;
1039
+ return "".concat(name, "=").concat(val);
998
1040
  })
999
1041
  .join(' ');
1000
1042
  // Update this.id
@@ -1022,7 +1064,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1022
1064
  var val = attributes[name];
1023
1065
  if (val === 'null' || val === '""')
1024
1066
  return name;
1025
- return name + "=" + _this.quoteAttribute(String(val));
1067
+ return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
1026
1068
  })
1027
1069
  .join(' ');
1028
1070
  };
@@ -1064,7 +1106,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1064
1106
  });
1065
1107
  }
1066
1108
  else {
1067
- throw new Error("The value provided ('" + where + "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'");
1109
+ throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
1068
1110
  }
1069
1111
  // if (!where || html === undefined || html === null) {
1070
1112
  // return;
@@ -1109,6 +1151,45 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1109
1151
  enumerable: false,
1110
1152
  configurable: true
1111
1153
  });
1154
+ Object.defineProperty(HTMLElement.prototype, "previousSibling", {
1155
+ get: function () {
1156
+ if (this.parentNode) {
1157
+ var children = this.parentNode.childNodes;
1158
+ var i = children.length;
1159
+ while (i > 0) {
1160
+ var child = children[--i];
1161
+ if (this === child)
1162
+ return children[i - 1] || null;
1163
+ }
1164
+ return null;
1165
+ }
1166
+ },
1167
+ enumerable: false,
1168
+ configurable: true
1169
+ });
1170
+ Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
1171
+ get: function () {
1172
+ if (this.parentNode) {
1173
+ var children = this.parentNode.childNodes;
1174
+ var i = children.length;
1175
+ var find = false;
1176
+ while (i > 0) {
1177
+ var child = children[--i];
1178
+ if (find) {
1179
+ if (child instanceof HTMLElement) {
1180
+ return child || null;
1181
+ }
1182
+ }
1183
+ else if (this === child) {
1184
+ find = true;
1185
+ }
1186
+ }
1187
+ return null;
1188
+ }
1189
+ },
1190
+ enumerable: false,
1191
+ configurable: true
1192
+ });
1112
1193
  Object.defineProperty(HTMLElement.prototype, "classNames", {
1113
1194
  get: function () {
1114
1195
  return this.classList.toString();
@@ -1116,6 +1197,12 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1116
1197
  enumerable: false,
1117
1198
  configurable: true
1118
1199
  });
1200
+ /**
1201
+ * Clone this Node
1202
+ */
1203
+ HTMLElement.prototype.clone = function () {
1204
+ return parse(this.toString()).firstChild;
1205
+ };
1119
1206
  return HTMLElement;
1120
1207
  }(node_2.default));
1121
1208
  exports.default = HTMLElement;
@@ -1208,8 +1295,8 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1208
1295
  pre: true,
1209
1296
  };
1210
1297
  var element_names = Object.keys(elements);
1211
- var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1212
- var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1298
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
1299
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
1213
1300
  function element_should_be_ignore(tag) {
1214
1301
  return kIgnoreElements.some(function (it) { return it.test(tag); });
1215
1302
  }
@@ -1224,7 +1311,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1224
1311
  var noNestedTagIndex = undefined;
1225
1312
  var match;
1226
1313
  // https://github.com/taoqf/node-html-parser/issues/38
1227
- data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1314
+ data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
1228
1315
  var lowerCaseTagName = options.lowerCaseTagName;
1229
1316
  var dataEndPos = data.length - (frameflag.length + 2);
1230
1317
  var frameFlagOffset = frameflag.length + 2;
@@ -1292,7 +1379,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1292
1379
  stack.push(currentParent);
1293
1380
  if (is_block_text_element(tagName)) {
1294
1381
  // Find closing tag
1295
- var closeMarkup = "</" + tagName + ">";
1382
+ var closeMarkup = "</".concat(tagName, ">");
1296
1383
  var closeIndex = lowerCaseTagName
1297
1384
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1298
1385
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
@@ -1359,18 +1446,25 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1359
1446
  if (last.parentNode && last.parentNode.parentNode) {
1360
1447
  if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
1361
1448
  // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1362
- oneBefore.removeChild(last);
1363
- last.childNodes.forEach(function (child) {
1364
- oneBefore.parentNode.appendChild(child);
1365
- });
1366
- stack.pop();
1449
+ // this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
1450
+ if (options.parseNoneClosedTags !== true) {
1451
+ oneBefore.removeChild(last);
1452
+ last.childNodes.forEach(function (child) {
1453
+ oneBefore.parentNode.appendChild(child);
1454
+ });
1455
+ stack.pop();
1456
+ }
1367
1457
  }
1368
1458
  else {
1369
1459
  // Single error <div> <h3> </div> handle: Just removes <h3>
1370
- oneBefore.removeChild(last);
1371
- last.childNodes.forEach(function (child) {
1372
- oneBefore.appendChild(child);
1373
- });
1460
+ // Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
1461
+ // eslint-disable-next-line no-lonely-if
1462
+ if (options.parseNoneClosedTags !== true) {
1463
+ oneBefore.removeChild(last);
1464
+ last.childNodes.forEach(function (child) {
1465
+ oneBefore.appendChild(child);
1466
+ });
1467
+ }
1374
1468
  }
1375
1469
  }
1376
1470
  else {
@@ -1389,45 +1483,6 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1389
1483
  }
1390
1484
  exports.parse = parse;
1391
1485
  });
1392
- define("nodes/node", ["require", "exports", "he"], function (require, exports, he_3) {
1393
- "use strict";
1394
- Object.defineProperty(exports, "__esModule", { value: true });
1395
- /**
1396
- * Node Class as base class for TextNode and HTMLElement.
1397
- */
1398
- var Node = /** @class */ (function () {
1399
- function Node(parentNode, range) {
1400
- if (parentNode === void 0) { parentNode = null; }
1401
- this.parentNode = parentNode;
1402
- this.childNodes = [];
1403
- Object.defineProperty(this, 'range', {
1404
- enumerable: false,
1405
- writable: true,
1406
- configurable: true,
1407
- value: range !== null && range !== void 0 ? range : [-1, -1]
1408
- });
1409
- }
1410
- Object.defineProperty(Node.prototype, "innerText", {
1411
- get: function () {
1412
- return this.rawText;
1413
- },
1414
- enumerable: false,
1415
- configurable: true
1416
- });
1417
- Object.defineProperty(Node.prototype, "textContent", {
1418
- get: function () {
1419
- return (0, he_3.decode)(this.rawText);
1420
- },
1421
- set: function (val) {
1422
- this.rawText = (0, he_3.encode)(val);
1423
- },
1424
- enumerable: false,
1425
- configurable: true
1426
- });
1427
- return Node;
1428
- }());
1429
- exports.default = Node;
1430
- });
1431
1486
  define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], function (require, exports, node_3, type_4) {
1432
1487
  "use strict";
1433
1488
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -1445,6 +1500,9 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
1445
1500
  _this.nodeType = type_4.default.COMMENT_NODE;
1446
1501
  return _this;
1447
1502
  }
1503
+ CommentNode.prototype.clone = function () {
1504
+ return new CommentNode(this.rawText, null);
1505
+ };
1448
1506
  Object.defineProperty(CommentNode.prototype, "text", {
1449
1507
  /**
1450
1508
  * Get unescaped text value of current node and its children.
@@ -1457,7 +1515,7 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
1457
1515
  configurable: true
1458
1516
  });
1459
1517
  CommentNode.prototype.toString = function () {
1460
- return "<!--" + this.rawText + "-->";
1518
+ return "<!--".concat(this.rawText, "-->");
1461
1519
  };
1462
1520
  return CommentNode;
1463
1521
  }(node_3.default));
@@ -1,8 +1,9 @@
1
+ import HTMLElement from './html';
1
2
  import Node from './node';
2
3
  import NodeType from './type';
3
- import HTMLElement from './html';
4
4
  export default class CommentNode extends Node {
5
5
  rawText: string;
6
+ clone(): CommentNode;
6
7
  constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
7
8
  /**
8
9
  * Node Type declaration.
@@ -32,6 +32,9 @@ var CommentNode = /** @class */ (function (_super) {
32
32
  _this.nodeType = type_1.default.COMMENT_NODE;
33
33
  return _this;
34
34
  }
35
+ CommentNode.prototype.clone = function () {
36
+ return new CommentNode(this.rawText, null);
37
+ };
35
38
  Object.defineProperty(CommentNode.prototype, "text", {
36
39
  /**
37
40
  * Get unescaped text value of current node and its children.
@@ -44,7 +47,7 @@ var CommentNode = /** @class */ (function (_super) {
44
47
  configurable: true
45
48
  });
46
49
  CommentNode.prototype.toString = function () {
47
- return "<!--" + this.rawText + "-->";
50
+ return "<!--".concat(this.rawText, "-->");
48
51
  };
49
52
  return CommentNode;
50
53
  }(node_1.default));
@@ -189,11 +189,18 @@ export default class HTMLElement extends Node {
189
189
  insertAdjacentHTML(where: InsertPosition, html: string): void;
190
190
  get nextSibling(): Node;
191
191
  get nextElementSibling(): HTMLElement;
192
+ get previousSibling(): Node;
193
+ get previousElementSibling(): HTMLElement;
192
194
  get classNames(): string;
195
+ /**
196
+ * Clone this Node
197
+ */
198
+ clone(): Node;
193
199
  }
194
200
  export interface Options {
195
201
  lowerCaseTagName: boolean;
196
202
  comment: boolean;
203
+ parseNoneClosedTags?: boolean;
197
204
  blockTextElements: {
198
205
  [tag: string]: boolean;
199
206
  };
@@ -39,14 +39,14 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
39
39
  };
40
40
  Object.defineProperty(exports, "__esModule", { value: true });
41
41
  exports.parse = exports.base_parse = void 0;
42
- var he_1 = __importDefault(require("he"));
43
42
  var css_select_1 = require("css-select");
44
- var node_1 = __importDefault(require("./node"));
45
- var type_1 = __importDefault(require("./type"));
46
- var text_1 = __importDefault(require("./text"));
47
- var matcher_1 = __importDefault(require("../matcher"));
43
+ var he_1 = __importDefault(require("he"));
48
44
  var back_1 = __importDefault(require("../back"));
45
+ var matcher_1 = __importDefault(require("../matcher"));
49
46
  var comment_1 = __importDefault(require("./comment"));
47
+ var node_1 = __importDefault(require("./node"));
48
+ var text_1 = __importDefault(require("./text"));
49
+ var type_1 = __importDefault(require("./type"));
50
50
  var voidTags = new Set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr']);
51
51
  function decode(val) {
52
52
  // clone string
@@ -86,7 +86,7 @@ var DOMTokenList = /** @class */ (function () {
86
86
  }
87
87
  DOMTokenList.prototype._validate = function (c) {
88
88
  if (/\s/.test(c)) {
89
- throw new Error("DOMException in DOMTokenList.add: The token '" + c + "' contains HTML space characters, which are not valid in tokens.");
89
+ throw new Error("DOMException in DOMTokenList.add: The token '".concat(c, "' contains HTML space characters, which are not valid in tokens."));
90
90
  }
91
91
  };
92
92
  DOMTokenList.prototype.add = function (c) {
@@ -170,14 +170,14 @@ var HTMLElement = /** @class */ (function (_super) {
170
170
  );
171
171
  if (keyAttrs.id) {
172
172
  if (!rawAttrs) {
173
- _this.rawAttrs = "id=\"" + keyAttrs.id + "\"";
173
+ _this.rawAttrs = "id=\"".concat(keyAttrs.id, "\"");
174
174
  }
175
175
  }
176
176
  if (keyAttrs.class) {
177
177
  if (!rawAttrs) {
178
- var cls = "class=\"" + _this.classList.toString() + "\"";
178
+ var cls = "class=\"".concat(_this.classList.toString(), "\"");
179
179
  if (_this.rawAttrs) {
180
- _this.rawAttrs += " " + cls;
180
+ _this.rawAttrs += " ".concat(cls);
181
181
  }
182
182
  else {
183
183
  _this.rawAttrs = cls;
@@ -322,7 +322,7 @@ var HTMLElement = /** @class */ (function (_super) {
322
322
  else {
323
323
  var text = node.trimmedText;
324
324
  if (currentBlock.prependWhitespace) {
325
- text = " " + text;
325
+ text = " ".concat(text);
326
326
  currentBlock.prependWhitespace = false;
327
327
  }
328
328
  currentBlock.push(text);
@@ -343,8 +343,8 @@ var HTMLElement = /** @class */ (function (_super) {
343
343
  HTMLElement.prototype.toString = function () {
344
344
  var tag = this.rawTagName;
345
345
  if (tag) {
346
- var attrs = this.rawAttrs ? " " + this.rawAttrs : '';
347
- return this.isVoidElement ? "<" + tag + attrs + ">" : "<" + tag + attrs + ">" + this.innerHTML + "</" + tag + ">";
346
+ var attrs = this.rawAttrs ? " ".concat(this.rawAttrs) : '';
347
+ return this.isVoidElement ? "<".concat(tag).concat(attrs, ">") : "<".concat(tag).concat(attrs, ">").concat(this.innerHTML, "</").concat(tag, ">");
348
348
  }
349
349
  return this.innerHTML;
350
350
  };
@@ -440,9 +440,9 @@ var HTMLElement = /** @class */ (function (_super) {
440
440
  res.push(' '.repeat(indention) + str);
441
441
  }
442
442
  function dfs(node) {
443
- var idStr = node.id ? "#" + node.id : '';
444
- var classStr = node.classList.length ? "." + node.classList.value.join('.') : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
445
- write("" + node.rawTagName + idStr + classStr);
443
+ var idStr = node.id ? "#".concat(node.id) : '';
444
+ var classStr = node.classList.length ? ".".concat(node.classList.value.join('.')) : ''; // eslint-disable-line @typescript-eslint/no-unsafe-member-access, @typescript-eslint/no-unsafe-member-access, @typescript-eslint/restrict-template-expressions, @typescript-eslint/no-unsafe-call
445
+ write("".concat(node.rawTagName).concat(idStr).concat(classStr));
446
446
  indention++;
447
447
  node.childNodes.forEach(function (childNode) {
448
448
  if (childNode.nodeType === type_1.default.ELEMENT_NODE) {
@@ -701,7 +701,7 @@ var HTMLElement = /** @class */ (function (_super) {
701
701
  if (val === undefined || val === 'null') {
702
702
  return name;
703
703
  }
704
- return name + "=" + val;
704
+ return "".concat(name, "=").concat(val);
705
705
  })
706
706
  .join(' ');
707
707
  // Update this.id
@@ -748,7 +748,7 @@ var HTMLElement = /** @class */ (function (_super) {
748
748
  var val = _this.quoteAttribute(attrs[name]);
749
749
  if (val === 'null' || val === '""')
750
750
  return name;
751
- return name + "=" + val;
751
+ return "".concat(name, "=").concat(val);
752
752
  })
753
753
  .join(' ');
754
754
  // Update this.id
@@ -776,7 +776,7 @@ var HTMLElement = /** @class */ (function (_super) {
776
776
  var val = attributes[name];
777
777
  if (val === 'null' || val === '""')
778
778
  return name;
779
- return name + "=" + _this.quoteAttribute(String(val));
779
+ return "".concat(name, "=").concat(_this.quoteAttribute(String(val)));
780
780
  })
781
781
  .join(' ');
782
782
  };
@@ -818,7 +818,7 @@ var HTMLElement = /** @class */ (function (_super) {
818
818
  });
819
819
  }
820
820
  else {
821
- throw new Error("The value provided ('" + where + "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'");
821
+ throw new Error("The value provided ('".concat(where, "') is not one of 'beforebegin', 'afterbegin', 'beforeend', or 'afterend'"));
822
822
  }
823
823
  // if (!where || html === undefined || html === null) {
824
824
  // return;
@@ -863,6 +863,45 @@ var HTMLElement = /** @class */ (function (_super) {
863
863
  enumerable: false,
864
864
  configurable: true
865
865
  });
866
+ Object.defineProperty(HTMLElement.prototype, "previousSibling", {
867
+ get: function () {
868
+ if (this.parentNode) {
869
+ var children = this.parentNode.childNodes;
870
+ var i = children.length;
871
+ while (i > 0) {
872
+ var child = children[--i];
873
+ if (this === child)
874
+ return children[i - 1] || null;
875
+ }
876
+ return null;
877
+ }
878
+ },
879
+ enumerable: false,
880
+ configurable: true
881
+ });
882
+ Object.defineProperty(HTMLElement.prototype, "previousElementSibling", {
883
+ get: function () {
884
+ if (this.parentNode) {
885
+ var children = this.parentNode.childNodes;
886
+ var i = children.length;
887
+ var find = false;
888
+ while (i > 0) {
889
+ var child = children[--i];
890
+ if (find) {
891
+ if (child instanceof HTMLElement) {
892
+ return child || null;
893
+ }
894
+ }
895
+ else if (this === child) {
896
+ find = true;
897
+ }
898
+ }
899
+ return null;
900
+ }
901
+ },
902
+ enumerable: false,
903
+ configurable: true
904
+ });
866
905
  Object.defineProperty(HTMLElement.prototype, "classNames", {
867
906
  get: function () {
868
907
  return this.classList.toString();
@@ -870,6 +909,12 @@ var HTMLElement = /** @class */ (function (_super) {
870
909
  enumerable: false,
871
910
  configurable: true
872
911
  });
912
+ /**
913
+ * Clone this Node
914
+ */
915
+ HTMLElement.prototype.clone = function () {
916
+ return parse(this.toString()).firstChild;
917
+ };
873
918
  return HTMLElement;
874
919
  }(node_1.default));
875
920
  exports.default = HTMLElement;
@@ -962,8 +1007,8 @@ function base_parse(data, options) {
962
1007
  pre: true,
963
1008
  };
964
1009
  var element_names = Object.keys(elements);
965
- var kBlockTextElements = element_names.map(function (it) { return new RegExp("^" + it + "$", 'i'); });
966
- var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^" + it + "$", 'i'); });
1010
+ var kBlockTextElements = element_names.map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
1011
+ var kIgnoreElements = element_names.filter(function (it) { return elements[it]; }).map(function (it) { return new RegExp("^".concat(it, "$"), 'i'); });
967
1012
  function element_should_be_ignore(tag) {
968
1013
  return kIgnoreElements.some(function (it) { return it.test(tag); });
969
1014
  }
@@ -978,7 +1023,7 @@ function base_parse(data, options) {
978
1023
  var noNestedTagIndex = undefined;
979
1024
  var match;
980
1025
  // https://github.com/taoqf/node-html-parser/issues/38
981
- data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1026
+ data = "<".concat(frameflag, ">").concat(data, "</").concat(frameflag, ">");
982
1027
  var lowerCaseTagName = options.lowerCaseTagName;
983
1028
  var dataEndPos = data.length - (frameflag.length + 2);
984
1029
  var frameFlagOffset = frameflag.length + 2;
@@ -1046,7 +1091,7 @@ function base_parse(data, options) {
1046
1091
  stack.push(currentParent);
1047
1092
  if (is_block_text_element(tagName)) {
1048
1093
  // Find closing tag
1049
- var closeMarkup = "</" + tagName + ">";
1094
+ var closeMarkup = "</".concat(tagName, ">");
1050
1095
  var closeIndex = lowerCaseTagName
1051
1096
  ? data.toLocaleLowerCase().indexOf(closeMarkup, kMarkupPattern.lastIndex)
1052
1097
  : data.indexOf(closeMarkup, kMarkupPattern.lastIndex);
@@ -1113,18 +1158,25 @@ function parse(data, options) {
1113
1158
  if (last.parentNode && last.parentNode.parentNode) {
1114
1159
  if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
1115
1160
  // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1116
- oneBefore.removeChild(last);
1117
- last.childNodes.forEach(function (child) {
1118
- oneBefore.parentNode.appendChild(child);
1119
- });
1120
- stack.pop();
1161
+ // this is wrong, becouse this will put the H3 outside the current right position which should be inside the current Html Element, see issue 152 for more info
1162
+ if (options.parseNoneClosedTags !== true) {
1163
+ oneBefore.removeChild(last);
1164
+ last.childNodes.forEach(function (child) {
1165
+ oneBefore.parentNode.appendChild(child);
1166
+ });
1167
+ stack.pop();
1168
+ }
1121
1169
  }
1122
1170
  else {
1123
1171
  // Single error <div> <h3> </div> handle: Just removes <h3>
1124
- oneBefore.removeChild(last);
1125
- last.childNodes.forEach(function (child) {
1126
- oneBefore.appendChild(child);
1127
- });
1172
+ // Why remove? this is already a HtmlElement and the missing <H3> is already added in this case. see issue 152 for more info
1173
+ // eslint-disable-next-line no-lonely-if
1174
+ if (options.parseNoneClosedTags !== true) {
1175
+ oneBefore.removeChild(last);
1176
+ last.childNodes.forEach(function (child) {
1177
+ oneBefore.appendChild(child);
1178
+ });
1179
+ }
1128
1180
  }
1129
1181
  }
1130
1182
  else {
@@ -11,6 +11,7 @@ export default abstract class Node {
11
11
  abstract text: string;
12
12
  abstract rawText: string;
13
13
  abstract toString(): string;
14
+ abstract clone(): Node;
14
15
  constructor(parentNode?: HTMLElement, range?: [number, number]);
15
16
  get innerText(): string;
16
17
  get textContent(): string;
@@ -6,6 +6,7 @@ import NodeType from './type';
6
6
  * @param {string} value [description]
7
7
  */
8
8
  export default class TextNode extends Node {
9
+ clone(): TextNode;
9
10
  constructor(rawText: string, parentNode: HTMLElement, range?: [number, number]);
10
11
  /**
11
12
  * Node Type declaration.
@@ -37,6 +37,9 @@ var TextNode = /** @class */ (function (_super) {
37
37
  _this._rawText = rawText;
38
38
  return _this;
39
39
  }
40
+ TextNode.prototype.clone = function () {
41
+ return new TextNode(this._rawText, null);
42
+ };
40
43
  Object.defineProperty(TextNode.prototype, "rawText", {
41
44
  get: function () {
42
45
  return this._rawText;
package/package.json CHANGED
@@ -1,9 +1,14 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "5.2.0",
3
+ "version": "5.2.6",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
+ "exports": {
8
+ "require": "./dist/index.js",
9
+ "import": "./esm/index.js",
10
+ "types": "./dist/index.d.ts"
11
+ },
7
12
  "scripts": {
8
13
  "compile": "tsc",
9
14
  "build": "npm run lint && npm run clean && npm run compile:cjs && npm run compile:amd",
@@ -48,7 +53,7 @@
48
53
  "registry": "https://registry.npmjs.org"
49
54
  },
50
55
  "dependencies": {
51
- "css-select": "^4.1.3",
56
+ "css-select": "^4.2.1",
52
57
  "he": "1.2.0"
53
58
  },
54
59
  "devDependencies": {
@@ -67,7 +72,6 @@
67
72
  "high5": "^1.0.0",
68
73
  "html-dom-parser": "^1.0.4",
69
74
  "html-parser": "^0.11.0",
70
- "html5": "^1.0.5",
71
75
  "html5parser": "^2.0.2",
72
76
  "htmljs-parser": "^2.11.1",
73
77
  "htmlparser": "^1.7.7",
@@ -109,9 +113,5 @@
109
113
  "url": "https://github.com/taoqf/node-fast-html-parser/issues"
110
114
  },
111
115
  "homepage": "https://github.com/taoqf/node-fast-html-parser",
112
- "sideEffects": false,
113
- "exports": {
114
- "require": "./dist/index.js",
115
- "import": "./esm/index.js"
116
- }
116
+ "sideEffects": false
117
117
  }