node-html-parser 3.1.4 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Fast HTML Parser [![NPM version](https://badge.fury.io/js/node-html-parser.png)](http://badge.fury.io/js/node-html-parser) [![Build Status](https://travis-ci.org/taoqf/node-html-parser.svg?branch=master)](https://travis-ci.org/taoqf/node-html-parser)
2
2
 
3
3
  Fast HTML Parser is a _very fast_ HTML parser. Which will generate a simplified
4
- DOM tree, with basic element query support.
4
+ DOM tree, with element query support.
5
5
 
6
6
  Per the design, it intends to parse massive HTML files in lowest price, thus the
7
7
  performance is the top priority. For this reason, some malformatted HTML may not
@@ -112,6 +112,10 @@ Note: Full css3 selector supported since v3.0.0.
112
112
 
113
113
  Query CSS Selector to find matching node.
114
114
 
115
+ ### HTMLElement#closest(selector)
116
+
117
+ Query closest element by css selector.
118
+
115
119
  ### HTMLElement#appendChild(node)
116
120
 
117
121
  Append a child node to childNodes
@@ -6,7 +6,6 @@ import TextNode from './text';
6
6
  import Matcher from '../matcher';
7
7
  import arr_back from '../back';
8
8
  import CommentNode from './comment';
9
- import parse from '../parse';
10
9
  // const { decode } = he;
11
10
  function decode(val) {
12
11
  // clone string
@@ -479,6 +478,61 @@ export default class HTMLElement extends Node {
479
478
  // }
480
479
  // return null;
481
480
  }
481
+ /**
482
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
483
+ * @param selector a DOMString containing a selector list
484
+ */
485
+ closest(selector) {
486
+ const mapChild = new Map();
487
+ let el = this;
488
+ let old = null;
489
+ function findOne(test, elems) {
490
+ let elem = null;
491
+ for (let i = 0, l = elems.length; i < l && !elem; i++) {
492
+ const el = elems[i];
493
+ if (test(el)) {
494
+ elem = el;
495
+ }
496
+ else {
497
+ const child = mapChild.get(el);
498
+ if (child) {
499
+ elem = findOne(test, [child]);
500
+ }
501
+ }
502
+ }
503
+ return elem;
504
+ }
505
+ while (el) {
506
+ mapChild.set(el, old);
507
+ old = el;
508
+ el = el.parentNode;
509
+ }
510
+ el = this;
511
+ while (el) {
512
+ const e = selectOne(selector, el, {
513
+ xmlMode: true,
514
+ adapter: {
515
+ ...Matcher,
516
+ getChildren(node) {
517
+ const child = mapChild.get(node);
518
+ return child && [child];
519
+ },
520
+ getSiblings(node) {
521
+ return [node];
522
+ },
523
+ findOne,
524
+ findAll() {
525
+ return [];
526
+ }
527
+ }
528
+ });
529
+ if (e) {
530
+ return e;
531
+ }
532
+ el = el.parentNode;
533
+ }
534
+ return null;
535
+ }
482
536
  /**
483
537
  * Append a child node to childNodes
484
538
  * @param {Node} node node to append
@@ -540,7 +594,7 @@ export default class HTMLElement extends Node {
540
594
  }
541
595
  const attrs = {};
542
596
  if (this.rawAttrs) {
543
- const re = /\b([a-z][a-z0-9-_]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
597
+ const re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
544
598
  let match;
545
599
  while ((match = re.exec(this.rawAttrs))) {
546
600
  attrs[match[1]] = match[2] || match[3] || match[4] || null;
@@ -929,3 +983,42 @@ export function base_parse(data, options = { lowerCaseTagName: false, comment: f
929
983
  }
930
984
  return stack;
931
985
  }
986
+ /**
987
+ * Parses HTML and returns a root element
988
+ * Parse a chuck of HTML source.
989
+ */
990
+ export function parse(data, options = { lowerCaseTagName: false, comment: false }) {
991
+ const stack = base_parse(data, options);
992
+ const [root] = stack;
993
+ while (stack.length > 1) {
994
+ // Handle each error elements.
995
+ const last = stack.pop();
996
+ const oneBefore = arr_back(stack);
997
+ if (last.parentNode && last.parentNode.parentNode) {
998
+ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
999
+ // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1000
+ oneBefore.removeChild(last);
1001
+ last.childNodes.forEach((child) => {
1002
+ oneBefore.parentNode.appendChild(child);
1003
+ });
1004
+ stack.pop();
1005
+ }
1006
+ else {
1007
+ // Single error <div> <h3> </div> handle: Just removes <h3>
1008
+ oneBefore.removeChild(last);
1009
+ last.childNodes.forEach((child) => {
1010
+ oneBefore.appendChild(child);
1011
+ });
1012
+ }
1013
+ }
1014
+ else {
1015
+ // If it's final element just skip.
1016
+ }
1017
+ }
1018
+ // response.childNodes.forEach((node) => {
1019
+ // if (node instanceof HTMLElement) {
1020
+ // node.parentNode = null;
1021
+ // }
1022
+ // });
1023
+ return root;
1024
+ }
package/dist/esm/parse.js CHANGED
@@ -1,41 +1 @@
1
- import arr_back from './back';
2
- import { base_parse } from './nodes/html';
3
- /**
4
- * Parses HTML and returns a root element
5
- * Parse a chuck of HTML source.
6
- */
7
- export default function parse(data, options = { lowerCaseTagName: false, comment: false }) {
8
- const stack = base_parse(data, options);
9
- const [root] = stack;
10
- while (stack.length > 1) {
11
- // Handle each error elements.
12
- const last = stack.pop();
13
- const oneBefore = arr_back(stack);
14
- if (last.parentNode && last.parentNode.parentNode) {
15
- if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
16
- // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
17
- oneBefore.removeChild(last);
18
- last.childNodes.forEach((child) => {
19
- oneBefore.parentNode.appendChild(child);
20
- });
21
- stack.pop();
22
- }
23
- else {
24
- // Single error <div> <h3> </div> handle: Just removes <h3>
25
- oneBefore.removeChild(last);
26
- last.childNodes.forEach((child) => {
27
- oneBefore.appendChild(child);
28
- });
29
- }
30
- }
31
- else {
32
- // If it's final element just skip.
33
- }
34
- }
35
- // response.childNodes.forEach((node) => {
36
- // if (node instanceof HTMLElement) {
37
- // node.parentNode = null;
38
- // }
39
- // });
40
- return root;
41
- }
1
+ export { parse as default } from './nodes/html';
package/dist/main.js CHANGED
@@ -16,6 +16,17 @@ var __extends = (this && this.__extends) || (function () {
16
16
  var __importDefault = (this && this.__importDefault) || function (mod) {
17
17
  return (mod && mod.__esModule) ? mod : { "default": mod };
18
18
  };
19
+ var __assign = (this && this.__assign) || function () {
20
+ __assign = Object.assign || function(t) {
21
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
22
+ s = arguments[i];
23
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
24
+ t[p] = s[p];
25
+ }
26
+ return t;
27
+ };
28
+ return __assign.apply(this, arguments);
29
+ };
19
30
  var __spreadArray = (this && this.__spreadArray) || function (to, from) {
20
31
  for (var i = 0, il = from.length, j = to.length; i < il; i++, j++)
21
32
  to[j] = from[i];
@@ -195,67 +206,17 @@ define("matcher", ["require", "exports", "nodes/type"], function (require, expor
195
206
  findAll: findAll
196
207
  };
197
208
  });
198
- define("parse", ["require", "exports", "back", "nodes/html"], function (require, exports, back_1, html_1) {
199
- "use strict";
200
- Object.defineProperty(exports, "__esModule", { value: true });
201
- back_1 = __importDefault(back_1);
202
- /**
203
- * Parses HTML and returns a root element
204
- * Parse a chuck of HTML source.
205
- */
206
- function parse(data, options) {
207
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
208
- var stack = html_1.base_parse(data, options);
209
- var root = stack[0];
210
- var _loop_1 = function () {
211
- // Handle each error elements.
212
- var last = stack.pop();
213
- var oneBefore = back_1.default(stack);
214
- if (last.parentNode && last.parentNode.parentNode) {
215
- if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
216
- // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
217
- oneBefore.removeChild(last);
218
- last.childNodes.forEach(function (child) {
219
- oneBefore.parentNode.appendChild(child);
220
- });
221
- stack.pop();
222
- }
223
- else {
224
- // Single error <div> <h3> </div> handle: Just removes <h3>
225
- oneBefore.removeChild(last);
226
- last.childNodes.forEach(function (child) {
227
- oneBefore.appendChild(child);
228
- });
229
- }
230
- }
231
- else {
232
- // If it's final element just skip.
233
- }
234
- };
235
- while (stack.length > 1) {
236
- _loop_1();
237
- }
238
- // response.childNodes.forEach((node) => {
239
- // if (node instanceof HTMLElement) {
240
- // node.parentNode = null;
241
- // }
242
- // });
243
- return root;
244
- }
245
- exports.default = parse;
246
- });
247
- define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "nodes/type", "nodes/text", "matcher", "back", "nodes/comment", "parse"], function (require, exports, he_1, css_select_1, node_2, type_3, text_1, matcher_1, back_2, comment_1, parse_1) {
209
+ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "nodes/type", "nodes/text", "matcher", "back", "nodes/comment"], function (require, exports, he_1, css_select_1, node_2, type_3, text_1, matcher_1, back_1, comment_1) {
248
210
  "use strict";
249
211
  Object.defineProperty(exports, "__esModule", { value: true });
250
- exports.base_parse = void 0;
212
+ exports.parse = exports.base_parse = void 0;
251
213
  he_1 = __importDefault(he_1);
252
214
  node_2 = __importDefault(node_2);
253
215
  type_3 = __importDefault(type_3);
254
216
  text_1 = __importDefault(text_1);
255
217
  matcher_1 = __importDefault(matcher_1);
256
- back_2 = __importDefault(back_2);
218
+ back_1 = __importDefault(back_1);
257
219
  comment_1 = __importDefault(comment_1);
258
- parse_1 = __importDefault(parse_1);
259
220
  // const { decode } = he;
260
221
  function decode(val) {
261
222
  // clone string
@@ -542,7 +503,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
542
503
  },
543
504
  set: function (content) {
544
505
  //const r = parse(content, global.options); // TODO global.options ?
545
- var r = parse_1.default(content);
506
+ var r = parse(content);
546
507
  this.childNodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
547
508
  },
548
509
  enumerable: false,
@@ -554,7 +515,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
554
515
  content = [content];
555
516
  }
556
517
  else if (typeof content == 'string') {
557
- var r = parse_1.default(content, options);
518
+ var r = parse(content, options);
558
519
  content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
559
520
  }
560
521
  this.childNodes = content;
@@ -571,7 +532,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
571
532
  }
572
533
  else if (typeof node == 'string') {
573
534
  // const r = parse(content, global.options); // TODO global.options ?
574
- var r = parse_1.default(node);
535
+ var r = parse(node);
575
536
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
576
537
  }
577
538
  return [];
@@ -782,6 +743,58 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
782
743
  // }
783
744
  // return null;
784
745
  };
746
+ /**
747
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
748
+ * @param selector a DOMString containing a selector list
749
+ */
750
+ HTMLElement.prototype.closest = function (selector) {
751
+ var mapChild = new Map();
752
+ var el = this;
753
+ var old = null;
754
+ function findOne(test, elems) {
755
+ var elem = null;
756
+ for (var i = 0, l = elems.length; i < l && !elem; i++) {
757
+ var el_1 = elems[i];
758
+ if (test(el_1)) {
759
+ elem = el_1;
760
+ }
761
+ else {
762
+ var child = mapChild.get(el_1);
763
+ if (child) {
764
+ elem = findOne(test, [child]);
765
+ }
766
+ }
767
+ }
768
+ return elem;
769
+ }
770
+ while (el) {
771
+ mapChild.set(el, old);
772
+ old = el;
773
+ el = el.parentNode;
774
+ }
775
+ el = this;
776
+ while (el) {
777
+ var e = css_select_1.selectOne(selector, el, {
778
+ xmlMode: true,
779
+ adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
780
+ var child = mapChild.get(node);
781
+ return child && [child];
782
+ },
783
+ getSiblings: function (node) {
784
+ return [node];
785
+ },
786
+ findOne: findOne,
787
+ findAll: function () {
788
+ return [];
789
+ } })
790
+ });
791
+ if (e) {
792
+ return e;
793
+ }
794
+ el = el.parentNode;
795
+ }
796
+ return null;
797
+ };
785
798
  /**
786
799
  * Append a child node to childNodes
787
800
  * @param {Node} node node to append
@@ -810,7 +823,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
810
823
  * @return {Node} last child node
811
824
  */
812
825
  get: function () {
813
- return back_2.default(this.childNodes);
826
+ return back_1.default(this.childNodes);
814
827
  },
815
828
  enumerable: false,
816
829
  configurable: true
@@ -860,7 +873,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
860
873
  }
861
874
  var attrs = {};
862
875
  if (this.rawAttrs) {
863
- var re = /\b([a-z][a-z0-9-_]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
876
+ var re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
864
877
  var match = void 0;
865
878
  while ((match = re.exec(this.rawAttrs))) {
866
879
  attrs[match[1]] = match[2] || match[3] || match[4] || null;
@@ -965,7 +978,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
965
978
  if (arguments.length < 2) {
966
979
  throw new Error('2 arguments required');
967
980
  }
968
- var p = parse_1.default(html);
981
+ var p = parse(html);
969
982
  if (where === 'afterend') {
970
983
  var idx = this.parentNode.childNodes.findIndex(function (child) {
971
984
  return child === _this;
@@ -1171,7 +1184,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1171
1184
  var match;
1172
1185
  // https://github.com/taoqf/node-html-parser/issues/38
1173
1186
  data = "<" + frameflag + ">" + data + "</" + frameflag + ">";
1174
- var _loop_2 = function () {
1187
+ var _loop_1 = function () {
1175
1188
  if (lastTextPos > -1) {
1176
1189
  if (lastTextPos + match[0].length < kMarkupPattern.lastIndex) {
1177
1190
  // if has content
@@ -1205,7 +1218,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1205
1218
  if (!match[4] && kElementsClosedByOpening[tagName]) {
1206
1219
  if (kElementsClosedByOpening[tagName][match[2]]) {
1207
1220
  stack.pop();
1208
- currentParent = back_2.default(stack);
1221
+ currentParent = back_1.default(stack);
1209
1222
  }
1210
1223
  }
1211
1224
  // ignore container tag we add above
@@ -1248,7 +1261,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1248
1261
  while (true) {
1249
1262
  if (currentParent.rawTagName === match[2]) {
1250
1263
  stack.pop();
1251
- currentParent = back_2.default(stack);
1264
+ currentParent = back_1.default(stack);
1252
1265
  break;
1253
1266
  }
1254
1267
  else {
@@ -1257,7 +1270,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1257
1270
  if (kElementsClosedByClosing[tagName]) {
1258
1271
  if (kElementsClosedByClosing[tagName][match[2]]) {
1259
1272
  stack.pop();
1260
- currentParent = back_2.default(stack);
1273
+ currentParent = back_1.default(stack);
1261
1274
  continue;
1262
1275
  }
1263
1276
  }
@@ -1268,11 +1281,55 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
1268
1281
  }
1269
1282
  };
1270
1283
  while ((match = kMarkupPattern.exec(data))) {
1271
- _loop_2();
1284
+ _loop_1();
1272
1285
  }
1273
1286
  return stack;
1274
1287
  }
1275
1288
  exports.base_parse = base_parse;
1289
+ /**
1290
+ * Parses HTML and returns a root element
1291
+ * Parse a chuck of HTML source.
1292
+ */
1293
+ function parse(data, options) {
1294
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1295
+ var stack = base_parse(data, options);
1296
+ var root = stack[0];
1297
+ var _loop_2 = function () {
1298
+ // Handle each error elements.
1299
+ var last = stack.pop();
1300
+ var oneBefore = back_1.default(stack);
1301
+ if (last.parentNode && last.parentNode.parentNode) {
1302
+ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
1303
+ // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1304
+ oneBefore.removeChild(last);
1305
+ last.childNodes.forEach(function (child) {
1306
+ oneBefore.parentNode.appendChild(child);
1307
+ });
1308
+ stack.pop();
1309
+ }
1310
+ else {
1311
+ // Single error <div> <h3> </div> handle: Just removes <h3>
1312
+ oneBefore.removeChild(last);
1313
+ last.childNodes.forEach(function (child) {
1314
+ oneBefore.appendChild(child);
1315
+ });
1316
+ }
1317
+ }
1318
+ else {
1319
+ // If it's final element just skip.
1320
+ }
1321
+ };
1322
+ while (stack.length > 1) {
1323
+ _loop_2();
1324
+ }
1325
+ // response.childNodes.forEach((node) => {
1326
+ // if (node instanceof HTMLElement) {
1327
+ // node.parentNode = null;
1328
+ // }
1329
+ // });
1330
+ return root;
1331
+ }
1332
+ exports.parse = parse;
1276
1333
  });
1277
1334
  define("nodes/node", ["require", "exports"], function (require, exports) {
1278
1335
  "use strict";
@@ -1342,6 +1399,12 @@ define("nodes/comment", ["require", "exports", "nodes/node", "nodes/type"], func
1342
1399
  }(node_3.default));
1343
1400
  exports.default = CommentNode;
1344
1401
  });
1402
+ define("parse", ["require", "exports", "nodes/html"], function (require, exports, html_1) {
1403
+ "use strict";
1404
+ Object.defineProperty(exports, "__esModule", { value: true });
1405
+ exports.default = void 0;
1406
+ Object.defineProperty(exports, "default", { enumerable: true, get: function () { return html_1.parse; } });
1407
+ });
1345
1408
  define("valid", ["require", "exports", "nodes/html"], function (require, exports, html_2) {
1346
1409
  "use strict";
1347
1410
  Object.defineProperty(exports, "__esModule", { value: true });
@@ -1356,14 +1419,14 @@ define("valid", ["require", "exports", "nodes/html"], function (require, exports
1356
1419
  }
1357
1420
  exports.default = valid;
1358
1421
  });
1359
- define("index", ["require", "exports", "nodes/comment", "nodes/html", "parse", "valid", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, comment_2, html_3, parse_2, valid_1, node_4, text_2, type_5) {
1422
+ define("index", ["require", "exports", "nodes/comment", "nodes/html", "parse", "valid", "nodes/node", "nodes/text", "nodes/type"], function (require, exports, comment_2, html_3, parse_1, valid_1, node_4, text_2, type_5) {
1360
1423
  "use strict";
1361
1424
  Object.defineProperty(exports, "__esModule", { value: true });
1362
1425
  exports.NodeType = exports.TextNode = exports.Node = exports.valid = exports.default = exports.parse = exports.HTMLElement = exports.CommentNode = void 0;
1363
1426
  Object.defineProperty(exports, "CommentNode", { enumerable: true, get: function () { return __importDefault(comment_2).default; } });
1364
1427
  Object.defineProperty(exports, "HTMLElement", { enumerable: true, get: function () { return __importDefault(html_3).default; } });
1365
- Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return __importDefault(parse_2).default; } });
1366
- Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(parse_2).default; } });
1428
+ Object.defineProperty(exports, "parse", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
1429
+ Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(parse_1).default; } });
1367
1430
  Object.defineProperty(exports, "valid", { enumerable: true, get: function () { return __importDefault(valid_1).default; } });
1368
1431
  Object.defineProperty(exports, "Node", { enumerable: true, get: function () { return __importDefault(node_4).default; } });
1369
1432
  Object.defineProperty(exports, "TextNode", { enumerable: true, get: function () { return __importDefault(text_2).default; } });
@@ -122,6 +122,11 @@ export default class HTMLElement extends Node {
122
122
  * @return {HTMLElement} matching node
123
123
  */
124
124
  querySelector(selector: string): HTMLElement;
125
+ /**
126
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
127
+ * @param selector a DOMString containing a selector list
128
+ */
129
+ closest(selector: string): Node;
125
130
  /**
126
131
  * Append a child node to childNodes
127
132
  * @param {Node} node node to append
@@ -187,4 +192,9 @@ export interface Options {
187
192
  * @return {HTMLElement} root element
188
193
  */
189
194
  export declare function base_parse(data: string, options?: Partial<Options>): HTMLElement[];
195
+ /**
196
+ * Parses HTML and returns a root element
197
+ * Parse a chuck of HTML source.
198
+ */
199
+ export declare function parse(data: string, options?: Partial<Options>): HTMLElement;
190
200
  export {};
@@ -14,6 +14,17 @@ var __extends = (this && this.__extends) || (function () {
14
14
  d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());
15
15
  };
16
16
  })();
17
+ var __assign = (this && this.__assign) || function () {
18
+ __assign = Object.assign || function(t) {
19
+ for (var s, i = 1, n = arguments.length; i < n; i++) {
20
+ s = arguments[i];
21
+ for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
22
+ t[p] = s[p];
23
+ }
24
+ return t;
25
+ };
26
+ return __assign.apply(this, arguments);
27
+ };
17
28
  var __spreadArray = (this && this.__spreadArray) || function (to, from) {
18
29
  for (var i = 0, il = from.length, j = to.length; i < il; i++, j++)
19
30
  to[j] = from[i];
@@ -23,7 +34,7 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
23
34
  return (mod && mod.__esModule) ? mod : { "default": mod };
24
35
  };
25
36
  Object.defineProperty(exports, "__esModule", { value: true });
26
- exports.base_parse = void 0;
37
+ exports.parse = exports.base_parse = void 0;
27
38
  var he_1 = __importDefault(require("he"));
28
39
  var css_select_1 = require("css-select");
29
40
  var node_1 = __importDefault(require("./node"));
@@ -32,7 +43,6 @@ var text_1 = __importDefault(require("./text"));
32
43
  var matcher_1 = __importDefault(require("../matcher"));
33
44
  var back_1 = __importDefault(require("../back"));
34
45
  var comment_1 = __importDefault(require("./comment"));
35
- var parse_1 = __importDefault(require("../parse"));
36
46
  // const { decode } = he;
37
47
  function decode(val) {
38
48
  // clone string
@@ -319,7 +329,7 @@ var HTMLElement = /** @class */ (function (_super) {
319
329
  },
320
330
  set: function (content) {
321
331
  //const r = parse(content, global.options); // TODO global.options ?
322
- var r = parse_1.default(content);
332
+ var r = parse(content);
323
333
  this.childNodes = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
324
334
  },
325
335
  enumerable: false,
@@ -331,7 +341,7 @@ var HTMLElement = /** @class */ (function (_super) {
331
341
  content = [content];
332
342
  }
333
343
  else if (typeof content == 'string') {
334
- var r = parse_1.default(content, options);
344
+ var r = parse(content, options);
335
345
  content = r.childNodes.length ? r.childNodes : [new text_1.default(content, this)];
336
346
  }
337
347
  this.childNodes = content;
@@ -348,7 +358,7 @@ var HTMLElement = /** @class */ (function (_super) {
348
358
  }
349
359
  else if (typeof node == 'string') {
350
360
  // const r = parse(content, global.options); // TODO global.options ?
351
- var r = parse_1.default(node);
361
+ var r = parse(node);
352
362
  return r.childNodes.length ? r.childNodes : [new text_1.default(node, _this)];
353
363
  }
354
364
  return [];
@@ -559,6 +569,58 @@ var HTMLElement = /** @class */ (function (_super) {
559
569
  // }
560
570
  // return null;
561
571
  };
572
+ /**
573
+ * traverses the Element and its parents (heading toward the document root) until it finds a node that matches the provided selector string. Will return itself or the matching ancestor. If no such element exists, it returns null.
574
+ * @param selector a DOMString containing a selector list
575
+ */
576
+ HTMLElement.prototype.closest = function (selector) {
577
+ var mapChild = new Map();
578
+ var el = this;
579
+ var old = null;
580
+ function findOne(test, elems) {
581
+ var elem = null;
582
+ for (var i = 0, l = elems.length; i < l && !elem; i++) {
583
+ var el_1 = elems[i];
584
+ if (test(el_1)) {
585
+ elem = el_1;
586
+ }
587
+ else {
588
+ var child = mapChild.get(el_1);
589
+ if (child) {
590
+ elem = findOne(test, [child]);
591
+ }
592
+ }
593
+ }
594
+ return elem;
595
+ }
596
+ while (el) {
597
+ mapChild.set(el, old);
598
+ old = el;
599
+ el = el.parentNode;
600
+ }
601
+ el = this;
602
+ while (el) {
603
+ var e = css_select_1.selectOne(selector, el, {
604
+ xmlMode: true,
605
+ adapter: __assign(__assign({}, matcher_1.default), { getChildren: function (node) {
606
+ var child = mapChild.get(node);
607
+ return child && [child];
608
+ },
609
+ getSiblings: function (node) {
610
+ return [node];
611
+ },
612
+ findOne: findOne,
613
+ findAll: function () {
614
+ return [];
615
+ } })
616
+ });
617
+ if (e) {
618
+ return e;
619
+ }
620
+ el = el.parentNode;
621
+ }
622
+ return null;
623
+ };
562
624
  /**
563
625
  * Append a child node to childNodes
564
626
  * @param {Node} node node to append
@@ -637,7 +699,7 @@ var HTMLElement = /** @class */ (function (_super) {
637
699
  }
638
700
  var attrs = {};
639
701
  if (this.rawAttrs) {
640
- var re = /\b([a-z][a-z0-9-_]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
702
+ var re = /\b([a-z][a-z0-9-_:]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/ig;
641
703
  var match = void 0;
642
704
  while ((match = re.exec(this.rawAttrs))) {
643
705
  attrs[match[1]] = match[2] || match[3] || match[4] || null;
@@ -742,7 +804,7 @@ var HTMLElement = /** @class */ (function (_super) {
742
804
  if (arguments.length < 2) {
743
805
  throw new Error('2 arguments required');
744
806
  }
745
- var p = parse_1.default(html);
807
+ var p = parse(html);
746
808
  if (where === 'afterend') {
747
809
  var idx = this.parentNode.childNodes.findIndex(function (child) {
748
810
  return child === _this;
@@ -1050,3 +1112,47 @@ function base_parse(data, options) {
1050
1112
  return stack;
1051
1113
  }
1052
1114
  exports.base_parse = base_parse;
1115
+ /**
1116
+ * Parses HTML and returns a root element
1117
+ * Parse a chuck of HTML source.
1118
+ */
1119
+ function parse(data, options) {
1120
+ if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
1121
+ var stack = base_parse(data, options);
1122
+ var root = stack[0];
1123
+ var _loop_2 = function () {
1124
+ // Handle each error elements.
1125
+ var last = stack.pop();
1126
+ var oneBefore = back_1.default(stack);
1127
+ if (last.parentNode && last.parentNode.parentNode) {
1128
+ if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
1129
+ // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
1130
+ oneBefore.removeChild(last);
1131
+ last.childNodes.forEach(function (child) {
1132
+ oneBefore.parentNode.appendChild(child);
1133
+ });
1134
+ stack.pop();
1135
+ }
1136
+ else {
1137
+ // Single error <div> <h3> </div> handle: Just removes <h3>
1138
+ oneBefore.removeChild(last);
1139
+ last.childNodes.forEach(function (child) {
1140
+ oneBefore.appendChild(child);
1141
+ });
1142
+ }
1143
+ }
1144
+ else {
1145
+ // If it's final element just skip.
1146
+ }
1147
+ };
1148
+ while (stack.length > 1) {
1149
+ _loop_2();
1150
+ }
1151
+ // response.childNodes.forEach((node) => {
1152
+ // if (node instanceof HTMLElement) {
1153
+ // node.parentNode = null;
1154
+ // }
1155
+ // });
1156
+ return root;
1157
+ }
1158
+ exports.parse = parse;
package/dist/parse.d.ts CHANGED
@@ -1,6 +1 @@
1
- import { Options } from './nodes/html';
2
- /**
3
- * Parses HTML and returns a root element
4
- * Parse a chuck of HTML source.
5
- */
6
- export default function parse(data: string, options?: Partial<Options>): import("./nodes/html").default;
1
+ export { parse as default } from './nodes/html';
package/dist/parse.js CHANGED
@@ -1,51 +1,5 @@
1
1
  "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
2
  Object.defineProperty(exports, "__esModule", { value: true });
6
- var back_1 = __importDefault(require("./back"));
3
+ exports.default = void 0;
7
4
  var html_1 = require("./nodes/html");
8
- /**
9
- * Parses HTML and returns a root element
10
- * Parse a chuck of HTML source.
11
- */
12
- function parse(data, options) {
13
- if (options === void 0) { options = { lowerCaseTagName: false, comment: false }; }
14
- var stack = html_1.base_parse(data, options);
15
- var root = stack[0];
16
- var _loop_1 = function () {
17
- // Handle each error elements.
18
- var last = stack.pop();
19
- var oneBefore = back_1.default(stack);
20
- if (last.parentNode && last.parentNode.parentNode) {
21
- if (last.parentNode === oneBefore && last.tagName === oneBefore.tagName) {
22
- // Pair error case <h3> <h3> handle : Fixes to <h3> </h3>
23
- oneBefore.removeChild(last);
24
- last.childNodes.forEach(function (child) {
25
- oneBefore.parentNode.appendChild(child);
26
- });
27
- stack.pop();
28
- }
29
- else {
30
- // Single error <div> <h3> </div> handle: Just removes <h3>
31
- oneBefore.removeChild(last);
32
- last.childNodes.forEach(function (child) {
33
- oneBefore.appendChild(child);
34
- });
35
- }
36
- }
37
- else {
38
- // If it's final element just skip.
39
- }
40
- };
41
- while (stack.length > 1) {
42
- _loop_1();
43
- }
44
- // response.childNodes.forEach((node) => {
45
- // if (node instanceof HTMLElement) {
46
- // node.parentNode = null;
47
- // }
48
- // });
49
- return root;
50
- }
51
- exports.default = parse;
5
+ Object.defineProperty(exports, "default", { enumerable: true, get: function () { return html_1.parse; } });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "3.1.4",
3
+ "version": "3.3.0",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -15,7 +15,7 @@
15
15
  "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
16
16
  "dev": "tsc -w & mocha -w ./test/*.js",
17
17
  "pretest": "tsc -m commonjs",
18
- "release": "np"
18
+ "release": "yarn build && np"
19
19
  },
20
20
  "keywords": [
21
21
  "parser",
@@ -81,5 +81,6 @@
81
81
  "bugs": {
82
82
  "url": "https://github.com/taoqf/node-fast-html-parser/issues"
83
83
  },
84
- "homepage": "https://github.com/taoqf/node-fast-html-parser"
84
+ "homepage": "https://github.com/taoqf/node-fast-html-parser",
85
+ "sideEffects": false
85
86
  }