node-html-parser 3.3.2 → 3.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,23 +11,80 @@ function decode(val) {
11
11
  // clone string
12
12
  return JSON.parse(JSON.stringify(he.decode(val)));
13
13
  }
14
- const kBlockElements = new Map();
15
- kBlockElements.set('DIV', true);
16
- kBlockElements.set('div', true);
17
- kBlockElements.set('P', true);
18
- kBlockElements.set('p', true);
19
- // ul: true,
20
- // ol: true,
21
- kBlockElements.set('LI', true);
22
- kBlockElements.set('li', true);
23
- // table: true,
24
- // tr: true,
25
- kBlockElements.set('TD', true);
26
- kBlockElements.set('td', true);
27
- kBlockElements.set('SECTION', true);
28
- kBlockElements.set('section', true);
29
- kBlockElements.set('BR', true);
30
- kBlockElements.set('br', true);
14
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
15
+ const kBlockElements = new Set();
16
+ kBlockElements.add('address');
17
+ kBlockElements.add('ADDRESS');
18
+ kBlockElements.add('article');
19
+ kBlockElements.add('ARTICLE');
20
+ kBlockElements.add('aside');
21
+ kBlockElements.add('ASIDE');
22
+ kBlockElements.add('blockquote');
23
+ kBlockElements.add('BLOCKQUOTE');
24
+ kBlockElements.add('br');
25
+ kBlockElements.add('BR');
26
+ kBlockElements.add('details');
27
+ kBlockElements.add('DETAILS');
28
+ kBlockElements.add('dialog');
29
+ kBlockElements.add('DIALOG');
30
+ kBlockElements.add('dd');
31
+ kBlockElements.add('DD');
32
+ kBlockElements.add('div');
33
+ kBlockElements.add('DIV');
34
+ kBlockElements.add('dl');
35
+ kBlockElements.add('DL');
36
+ kBlockElements.add('dt');
37
+ kBlockElements.add('DT');
38
+ kBlockElements.add('fieldset');
39
+ kBlockElements.add('FIELDSET');
40
+ kBlockElements.add('figcaption');
41
+ kBlockElements.add('FIGCAPTION');
42
+ kBlockElements.add('figure');
43
+ kBlockElements.add('FIGURE');
44
+ kBlockElements.add('footer');
45
+ kBlockElements.add('FOOTER');
46
+ kBlockElements.add('form');
47
+ kBlockElements.add('FORM');
48
+ kBlockElements.add('h1');
49
+ kBlockElements.add('H1');
50
+ kBlockElements.add('h2');
51
+ kBlockElements.add('H2');
52
+ kBlockElements.add('h3');
53
+ kBlockElements.add('H3');
54
+ kBlockElements.add('h4');
55
+ kBlockElements.add('H4');
56
+ kBlockElements.add('h5');
57
+ kBlockElements.add('H5');
58
+ kBlockElements.add('h6');
59
+ kBlockElements.add('H6');
60
+ kBlockElements.add('header');
61
+ kBlockElements.add('HEADER');
62
+ kBlockElements.add('hgroup');
63
+ kBlockElements.add('HGROUP');
64
+ kBlockElements.add('hr');
65
+ kBlockElements.add('HR');
66
+ kBlockElements.add('li');
67
+ kBlockElements.add('LI');
68
+ kBlockElements.add('main');
69
+ kBlockElements.add('MAIN');
70
+ kBlockElements.add('nav');
71
+ kBlockElements.add('NAV');
72
+ kBlockElements.add('ol');
73
+ kBlockElements.add('OL');
74
+ kBlockElements.add('p');
75
+ kBlockElements.add('P');
76
+ kBlockElements.add('pre');
77
+ kBlockElements.add('PRE');
78
+ kBlockElements.add('section');
79
+ kBlockElements.add('SECTION');
80
+ kBlockElements.add('table');
81
+ kBlockElements.add('TABLE');
82
+ kBlockElements.add('td');
83
+ kBlockElements.add('TD');
84
+ kBlockElements.add('tr');
85
+ kBlockElements.add('TR');
86
+ kBlockElements.add('ul');
87
+ kBlockElements.add('UL');
31
88
  class DOMTokenList {
32
89
  constructor(valuesInit = [], afterUpdate = (() => null)) {
33
90
  this._set = new Set(valuesInit);
@@ -207,7 +264,7 @@ export default class HTMLElement extends Node {
207
264
  const blocks = [currentBlock];
208
265
  function dfs(node) {
209
266
  if (node.nodeType === NodeType.ELEMENT_NODE) {
210
- if (kBlockElements.get(node.rawTagName)) {
267
+ if (kBlockElements.has(node.rawTagName)) {
211
268
  if (currentBlock.length > 0) {
212
269
  blocks.push(currentBlock = []);
213
270
  }
@@ -226,7 +283,7 @@ export default class HTMLElement extends Node {
226
283
  currentBlock.prependWhitespace = true;
227
284
  }
228
285
  else {
229
- let text = node.text;
286
+ let text = node.trimmedText;
230
287
  if (currentBlock.prependWhitespace) {
231
288
  text = ` ${text}`;
232
289
  currentBlock.prependWhitespace = false;
@@ -363,7 +420,7 @@ export default class HTMLElement extends Node {
363
420
  if (node.isWhitespace) {
364
421
  return;
365
422
  }
366
- node.rawText = node.rawText.trim();
423
+ node.rawText = node.trimmedText;
367
424
  }
368
425
  else if (node.nodeType === NodeType.ELEMENT_NODE) {
369
426
  node.removeWhitespace();
@@ -14,6 +14,41 @@ export default class TextNode extends Node {
14
14
  */
15
15
  this.nodeType = NodeType.TEXT_NODE;
16
16
  }
17
+ /**
18
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
19
+ */
20
+ get trimmedText() {
21
+ if (this._trimmedText !== undefined)
22
+ return this._trimmedText;
23
+ const text = this.rawText;
24
+ let i = 0;
25
+ let startPos;
26
+ let endPos;
27
+ while (i >= 0 && i < text.length) {
28
+ if (/\S/.test(text[i])) {
29
+ if (startPos === undefined) {
30
+ startPos = i;
31
+ i = text.length;
32
+ }
33
+ else {
34
+ endPos = i;
35
+ i = void 0;
36
+ }
37
+ }
38
+ if (startPos === undefined)
39
+ i++;
40
+ else
41
+ i--;
42
+ }
43
+ if (startPos === undefined)
44
+ startPos = 0;
45
+ if (endPos === undefined)
46
+ endPos = text.length - 1;
47
+ const hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
48
+ const hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
49
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
50
+ return this._trimmedText;
51
+ }
17
52
  /**
18
53
  * Get unescaped text value of current node and its children.
19
54
  * @return {string} text content
package/dist/main.js CHANGED
@@ -72,6 +72,45 @@ define("nodes/text", ["require", "exports", "nodes/type", "nodes/node"], functio
72
72
  _this.nodeType = type_1.default.TEXT_NODE;
73
73
  return _this;
74
74
  }
75
+ Object.defineProperty(TextNode.prototype, "trimmedText", {
76
+ /**
77
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
78
+ */
79
+ get: function () {
80
+ if (this._trimmedText !== undefined)
81
+ return this._trimmedText;
82
+ var text = this.rawText;
83
+ var i = 0;
84
+ var startPos;
85
+ var endPos;
86
+ while (i >= 0 && i < text.length) {
87
+ if (/\S/.test(text[i])) {
88
+ if (startPos === undefined) {
89
+ startPos = i;
90
+ i = text.length;
91
+ }
92
+ else {
93
+ endPos = i;
94
+ i = void 0;
95
+ }
96
+ }
97
+ if (startPos === undefined)
98
+ i++;
99
+ else
100
+ i--;
101
+ }
102
+ if (startPos === undefined)
103
+ startPos = 0;
104
+ if (endPos === undefined)
105
+ endPos = text.length - 1;
106
+ var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
107
+ var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
108
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
109
+ return this._trimmedText;
110
+ },
111
+ enumerable: false,
112
+ configurable: true
113
+ });
75
114
  Object.defineProperty(TextNode.prototype, "text", {
76
115
  /**
77
116
  * Get unescaped text value of current node and its children.
@@ -222,23 +261,80 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
222
261
  // clone string
223
262
  return JSON.parse(JSON.stringify(he_1.default.decode(val)));
224
263
  }
225
- var kBlockElements = new Map();
226
- kBlockElements.set('DIV', true);
227
- kBlockElements.set('div', true);
228
- kBlockElements.set('P', true);
229
- kBlockElements.set('p', true);
230
- // ul: true,
231
- // ol: true,
232
- kBlockElements.set('LI', true);
233
- kBlockElements.set('li', true);
234
- // table: true,
235
- // tr: true,
236
- kBlockElements.set('TD', true);
237
- kBlockElements.set('td', true);
238
- kBlockElements.set('SECTION', true);
239
- kBlockElements.set('section', true);
240
- kBlockElements.set('BR', true);
241
- kBlockElements.set('br', true);
264
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
265
+ var kBlockElements = new Set();
266
+ kBlockElements.add('address');
267
+ kBlockElements.add('ADDRESS');
268
+ kBlockElements.add('article');
269
+ kBlockElements.add('ARTICLE');
270
+ kBlockElements.add('aside');
271
+ kBlockElements.add('ASIDE');
272
+ kBlockElements.add('blockquote');
273
+ kBlockElements.add('BLOCKQUOTE');
274
+ kBlockElements.add('br');
275
+ kBlockElements.add('BR');
276
+ kBlockElements.add('details');
277
+ kBlockElements.add('DETAILS');
278
+ kBlockElements.add('dialog');
279
+ kBlockElements.add('DIALOG');
280
+ kBlockElements.add('dd');
281
+ kBlockElements.add('DD');
282
+ kBlockElements.add('div');
283
+ kBlockElements.add('DIV');
284
+ kBlockElements.add('dl');
285
+ kBlockElements.add('DL');
286
+ kBlockElements.add('dt');
287
+ kBlockElements.add('DT');
288
+ kBlockElements.add('fieldset');
289
+ kBlockElements.add('FIELDSET');
290
+ kBlockElements.add('figcaption');
291
+ kBlockElements.add('FIGCAPTION');
292
+ kBlockElements.add('figure');
293
+ kBlockElements.add('FIGURE');
294
+ kBlockElements.add('footer');
295
+ kBlockElements.add('FOOTER');
296
+ kBlockElements.add('form');
297
+ kBlockElements.add('FORM');
298
+ kBlockElements.add('h1');
299
+ kBlockElements.add('H1');
300
+ kBlockElements.add('h2');
301
+ kBlockElements.add('H2');
302
+ kBlockElements.add('h3');
303
+ kBlockElements.add('H3');
304
+ kBlockElements.add('h4');
305
+ kBlockElements.add('H4');
306
+ kBlockElements.add('h5');
307
+ kBlockElements.add('H5');
308
+ kBlockElements.add('h6');
309
+ kBlockElements.add('H6');
310
+ kBlockElements.add('header');
311
+ kBlockElements.add('HEADER');
312
+ kBlockElements.add('hgroup');
313
+ kBlockElements.add('HGROUP');
314
+ kBlockElements.add('hr');
315
+ kBlockElements.add('HR');
316
+ kBlockElements.add('li');
317
+ kBlockElements.add('LI');
318
+ kBlockElements.add('main');
319
+ kBlockElements.add('MAIN');
320
+ kBlockElements.add('nav');
321
+ kBlockElements.add('NAV');
322
+ kBlockElements.add('ol');
323
+ kBlockElements.add('OL');
324
+ kBlockElements.add('p');
325
+ kBlockElements.add('P');
326
+ kBlockElements.add('pre');
327
+ kBlockElements.add('PRE');
328
+ kBlockElements.add('section');
329
+ kBlockElements.add('SECTION');
330
+ kBlockElements.add('table');
331
+ kBlockElements.add('TABLE');
332
+ kBlockElements.add('td');
333
+ kBlockElements.add('TD');
334
+ kBlockElements.add('tr');
335
+ kBlockElements.add('TR');
336
+ kBlockElements.add('ul');
337
+ kBlockElements.add('UL');
242
338
  var DOMTokenList = /** @class */ (function () {
243
339
  function DOMTokenList(valuesInit, afterUpdate) {
244
340
  if (valuesInit === void 0) { valuesInit = []; }
@@ -454,7 +550,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
454
550
  var blocks = [currentBlock];
455
551
  function dfs(node) {
456
552
  if (node.nodeType === type_3.default.ELEMENT_NODE) {
457
- if (kBlockElements.get(node.rawTagName)) {
553
+ if (kBlockElements.has(node.rawTagName)) {
458
554
  if (currentBlock.length > 0) {
459
555
  blocks.push(currentBlock = []);
460
556
  }
@@ -473,7 +569,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
473
569
  currentBlock.prependWhitespace = true;
474
570
  }
475
571
  else {
476
- var text = node.text;
572
+ var text = node.trimmedText;
477
573
  if (currentBlock.prependWhitespace) {
478
574
  text = " " + text;
479
575
  currentBlock.prependWhitespace = false;
@@ -628,7 +724,7 @@ define("nodes/html", ["require", "exports", "he", "css-select", "nodes/node", "n
628
724
  if (node.isWhitespace) {
629
725
  return;
630
726
  }
631
- node.rawText = node.rawText.trim();
727
+ node.rawText = node.trimmedText;
632
728
  }
633
729
  else if (node.nodeType === type_3.default.ELEMENT_NODE) {
634
730
  node.removeWhitespace();
@@ -181,7 +181,7 @@ export default class HTMLElement extends Node {
181
181
  setAttributes(attributes: Attributes): void;
182
182
  insertAdjacentHTML(where: InsertPosition, html: string): void;
183
183
  get nextSibling(): Node;
184
- get nextElementSibling(): Node;
184
+ get nextElementSibling(): HTMLElement;
185
185
  get classNames(): string;
186
186
  }
187
187
  export interface Options {
@@ -48,23 +48,80 @@ function decode(val) {
48
48
  // clone string
49
49
  return JSON.parse(JSON.stringify(he_1.default.decode(val)));
50
50
  }
51
- var kBlockElements = new Map();
52
- kBlockElements.set('DIV', true);
53
- kBlockElements.set('div', true);
54
- kBlockElements.set('P', true);
55
- kBlockElements.set('p', true);
56
- // ul: true,
57
- // ol: true,
58
- kBlockElements.set('LI', true);
59
- kBlockElements.set('li', true);
60
- // table: true,
61
- // tr: true,
62
- kBlockElements.set('TD', true);
63
- kBlockElements.set('td', true);
64
- kBlockElements.set('SECTION', true);
65
- kBlockElements.set('section', true);
66
- kBlockElements.set('BR', true);
67
- kBlockElements.set('br', true);
51
+ // https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
52
+ var kBlockElements = new Set();
53
+ kBlockElements.add('address');
54
+ kBlockElements.add('ADDRESS');
55
+ kBlockElements.add('article');
56
+ kBlockElements.add('ARTICLE');
57
+ kBlockElements.add('aside');
58
+ kBlockElements.add('ASIDE');
59
+ kBlockElements.add('blockquote');
60
+ kBlockElements.add('BLOCKQUOTE');
61
+ kBlockElements.add('br');
62
+ kBlockElements.add('BR');
63
+ kBlockElements.add('details');
64
+ kBlockElements.add('DETAILS');
65
+ kBlockElements.add('dialog');
66
+ kBlockElements.add('DIALOG');
67
+ kBlockElements.add('dd');
68
+ kBlockElements.add('DD');
69
+ kBlockElements.add('div');
70
+ kBlockElements.add('DIV');
71
+ kBlockElements.add('dl');
72
+ kBlockElements.add('DL');
73
+ kBlockElements.add('dt');
74
+ kBlockElements.add('DT');
75
+ kBlockElements.add('fieldset');
76
+ kBlockElements.add('FIELDSET');
77
+ kBlockElements.add('figcaption');
78
+ kBlockElements.add('FIGCAPTION');
79
+ kBlockElements.add('figure');
80
+ kBlockElements.add('FIGURE');
81
+ kBlockElements.add('footer');
82
+ kBlockElements.add('FOOTER');
83
+ kBlockElements.add('form');
84
+ kBlockElements.add('FORM');
85
+ kBlockElements.add('h1');
86
+ kBlockElements.add('H1');
87
+ kBlockElements.add('h2');
88
+ kBlockElements.add('H2');
89
+ kBlockElements.add('h3');
90
+ kBlockElements.add('H3');
91
+ kBlockElements.add('h4');
92
+ kBlockElements.add('H4');
93
+ kBlockElements.add('h5');
94
+ kBlockElements.add('H5');
95
+ kBlockElements.add('h6');
96
+ kBlockElements.add('H6');
97
+ kBlockElements.add('header');
98
+ kBlockElements.add('HEADER');
99
+ kBlockElements.add('hgroup');
100
+ kBlockElements.add('HGROUP');
101
+ kBlockElements.add('hr');
102
+ kBlockElements.add('HR');
103
+ kBlockElements.add('li');
104
+ kBlockElements.add('LI');
105
+ kBlockElements.add('main');
106
+ kBlockElements.add('MAIN');
107
+ kBlockElements.add('nav');
108
+ kBlockElements.add('NAV');
109
+ kBlockElements.add('ol');
110
+ kBlockElements.add('OL');
111
+ kBlockElements.add('p');
112
+ kBlockElements.add('P');
113
+ kBlockElements.add('pre');
114
+ kBlockElements.add('PRE');
115
+ kBlockElements.add('section');
116
+ kBlockElements.add('SECTION');
117
+ kBlockElements.add('table');
118
+ kBlockElements.add('TABLE');
119
+ kBlockElements.add('td');
120
+ kBlockElements.add('TD');
121
+ kBlockElements.add('tr');
122
+ kBlockElements.add('TR');
123
+ kBlockElements.add('ul');
124
+ kBlockElements.add('UL');
68
125
  var DOMTokenList = /** @class */ (function () {
69
126
  function DOMTokenList(valuesInit, afterUpdate) {
70
127
  if (valuesInit === void 0) { valuesInit = []; }
@@ -280,7 +337,7 @@ var HTMLElement = /** @class */ (function (_super) {
280
337
  var blocks = [currentBlock];
281
338
  function dfs(node) {
282
339
  if (node.nodeType === type_1.default.ELEMENT_NODE) {
283
- if (kBlockElements.get(node.rawTagName)) {
340
+ if (kBlockElements.has(node.rawTagName)) {
284
341
  if (currentBlock.length > 0) {
285
342
  blocks.push(currentBlock = []);
286
343
  }
@@ -299,7 +356,7 @@ var HTMLElement = /** @class */ (function (_super) {
299
356
  currentBlock.prependWhitespace = true;
300
357
  }
301
358
  else {
302
- var text = node.text;
359
+ var text = node.trimmedText;
303
360
  if (currentBlock.prependWhitespace) {
304
361
  text = " " + text;
305
362
  currentBlock.prependWhitespace = false;
@@ -454,7 +511,7 @@ var HTMLElement = /** @class */ (function (_super) {
454
511
  if (node.isWhitespace) {
455
512
  return;
456
513
  }
457
- node.rawText = node.rawText.trim();
514
+ node.rawText = node.trimmedText;
458
515
  }
459
516
  else if (node.nodeType === type_1.default.ELEMENT_NODE) {
460
517
  node.removeWhitespace();
@@ -13,6 +13,11 @@ export default class TextNode extends Node {
13
13
  * @type {Number}
14
14
  */
15
15
  nodeType: NodeType;
16
+ private _trimmedText?;
17
+ /**
18
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
19
+ */
20
+ get trimmedText(): string;
16
21
  /**
17
22
  * Get unescaped text value of current node and its children.
18
23
  * @return {string} text content
@@ -36,6 +36,45 @@ var TextNode = /** @class */ (function (_super) {
36
36
  _this.nodeType = type_1.default.TEXT_NODE;
37
37
  return _this;
38
38
  }
39
+ Object.defineProperty(TextNode.prototype, "trimmedText", {
40
+ /**
41
+ * Returns text with all whitespace trimmed except single leading/trailing non-breaking space
42
+ */
43
+ get: function () {
44
+ if (this._trimmedText !== undefined)
45
+ return this._trimmedText;
46
+ var text = this.rawText;
47
+ var i = 0;
48
+ var startPos;
49
+ var endPos;
50
+ while (i >= 0 && i < text.length) {
51
+ if (/\S/.test(text[i])) {
52
+ if (startPos === undefined) {
53
+ startPos = i;
54
+ i = text.length;
55
+ }
56
+ else {
57
+ endPos = i;
58
+ i = void 0;
59
+ }
60
+ }
61
+ if (startPos === undefined)
62
+ i++;
63
+ else
64
+ i--;
65
+ }
66
+ if (startPos === undefined)
67
+ startPos = 0;
68
+ if (endPos === undefined)
69
+ endPos = text.length - 1;
70
+ var hasLeadingSpace = startPos > 0 && /[^\S\r\n]/.test(text[startPos - 1]);
71
+ var hasTrailingSpace = endPos < (text.length - 1) && /[^\S\r\n]/.test(text[endPos + 1]);
72
+ this._trimmedText = (hasLeadingSpace ? ' ' : '') + text.slice(startPos, endPos + 1) + (hasTrailingSpace ? ' ' : '');
73
+ return this._trimmedText;
74
+ },
75
+ enumerable: false,
76
+ configurable: true
77
+ });
39
78
  Object.defineProperty(TextNode.prototype, "text", {
40
79
  /**
41
80
  * Get unescaped text value of current node and its children.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-html-parser",
3
- "version": "3.3.2",
3
+ "version": "3.3.6",
4
4
  "description": "A very fast HTML parser, generating a simplified DOM, with basic element query support.",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/esm/index.js",
@@ -15,7 +15,8 @@
15
15
  "build": "npm run lint && npm run clean && npm run ts:cjs && npm run ts:amd && npm run ts:esm",
16
16
  "dev": "tsc -w & mocha -w ./test/*.js",
17
17
  "pretest": "tsc -m commonjs",
18
- "release": "yarn build && np"
18
+ "release": "yarn build && np",
19
+ "prepare": "npm run build"
19
20
  },
20
21
  "keywords": [
21
22
  "parser",
@@ -32,7 +33,7 @@
32
33
  "registry": "https://registry.npmjs.org"
33
34
  },
34
35
  "dependencies": {
35
- "css-select": "^3.1.2",
36
+ "css-select": "^4.1.3",
36
37
  "he": "1.2.0"
37
38
  },
38
39
  "devDependencies": {